From 874b3fae8e6ca22d6bc12e47103178e800f8e10c Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 28 Jan 2018 11:47:32 -0500 Subject: [PATCH 001/355] added latency/throughput to test --- test.sh | 4 ++-- torch2trt/test.py | 36 +++++++++++++++++++++++++++++------- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/test.sh b/test.sh index cfc1ccfa..3706bb90 100644 --- a/test.sh +++ b/test.sh @@ -2,8 +2,8 @@ OUTPUT_FILE=$1 touch $OUTPUT_FILE -echo "| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | FPS (PyTorch) | FPS (TensorRT) |" >> $OUTPUT_FILE -echo "|------|-----------|--------------|------------------|-----------|---------------|----------------|" >> $OUTPUT_FILE +echo "| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | Throughput (PyTorch) | Throughput (TensorRT) | Latency (PyTorch) | Latency (TensorRT) |" >> $OUTPUT_FILE +echo "|------|-----------|--------------|------------------|-----------|----------------------|-----------------------|-------------------|--------------------|" >> $OUTPUT_FILE python3 -m torch2trt.test -o $OUTPUT_FILE --name alexnet python3 -m torch2trt.test -o $OUTPUT_FILE --name squeezenet1_0 diff --git a/torch2trt/test.py b/torch2trt/test.py index 4583bb89..9f52ad08 100644 --- a/torch2trt/test.py +++ b/torch2trt/test.py @@ -45,25 +45,47 @@ def run(self): if max_error_i > max_error: max_error = max_error_i - # benchmark pytorch + # benchmark pytorch throughput + torch.cuda.current_stream().synchronize() t0 = time.time() for i in range(50): outputs = module(*inputs) - torch.cuda.current_stream().synchronize() + torch.cuda.current_stream().synchronize() t1 = time.time() fps = 50.0 / (t1 - t0) - # benchmark tensorrt + # benchmark tensorrt throughput + torch.cuda.current_stream().synchronize() t0 = time.time() for i in range(50): outputs = module_trt(*inputs) - torch.cuda.current_stream().synchronize() + torch.cuda.current_stream().synchronize() t1 = time.time() fps_trt = 50.0 / (t1 - t0) - return max_error, fps, fps_trt + # benchmark pytorch latency + torch.cuda.current_stream().synchronize() + t0 = time.time() + for i in range(50): + outputs = module(*inputs) + torch.cuda.current_stream().synchronize() + t1 = time.time() + + ms = 1000.0 * (t1 - t0) / 50.0 + + # benchmark tensorrt latency + torch.cuda.current_stream().synchronize() + t0 = time.time() + for i in range(50): + outputs = module_trt(*inputs) + torch.cuda.current_stream().synchronize() + t1 = time.time() + + ms_trt = 1000.0 * (t1 - t0) / 50.0 + + return max_error, fps, fps_trt, ms, ms_trt MODULE_TESTS = [ @@ -105,10 +127,10 @@ def run(self): continue # run test - max_error, fps, fps_trt = test.run() + max_error, fps, fps_trt, ms, ms_trt = test.run() # write entry - line = '| %s | %s | %s | %s | %.2E | %.3g | %.3g |' % (name, test.dtype.__repr__().split('.')[-1], str(test.input_shapes), str(test.torch2trt_kwargs), max_error, fps, fps_trt) + line = '| %s | %s | %s | %s | %.2E | %.3g | %.3g | %.3g | %.3g |' % (name, test.dtype.__repr__().split('.')[-1], str(test.input_shapes), str(test.torch2trt_kwargs), max_error, fps, fps_trt, ms, ms_trt) print(line) with open(args.output, 'a+') as f: f.write(line + '\n') \ No newline at end of file From a9fb7585fae0b8e2d5f6fce8f6dcc7e73869531f Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 11:38:21 -0400 Subject: [PATCH 002/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b36cb075..2d34eb1e 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ print(torch.max(torch.abs(y - y_trt))) | Model | PyTorch FP16 (Jetson Nano) | TensorRT FP16 (Jetson Nano) | |-------|--------------|-----------------| -| alexnet | 18.3s | 13.2 | +| alexnet | 0.018 | 0.013 | | squeezenet1_0 | 0.021 | 0.008 | | squeezenet1_1 | | | | resnet18 | | | From 96cc53f0c7ea6a61a24f241c0088390d52abcd94 Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 11:39:56 -0400 Subject: [PATCH 003/355] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2d34eb1e..be605b4e 100644 --- a/README.md +++ b/README.md @@ -39,8 +39,8 @@ print(torch.max(torch.abs(y - y_trt))) | Model | PyTorch FP16 (Jetson Nano) | TensorRT FP16 (Jetson Nano) | |-------|--------------|-----------------| -| alexnet | 0.018 | 0.013 | -| squeezenet1_0 | 0.021 | 0.008 | +| alexnet | 18ms | 13ms | +| squeezenet1_0 | 21ms | 8.4ms | | squeezenet1_1 | | | | resnet18 | | | | resnet50 | | | From 35450c64a57652b960c49b2f4a976a9efd7d9417 Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 11:42:54 -0400 Subject: [PATCH 004/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index be605b4e..0bf4c787 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ print(torch.max(torch.abs(y - y_trt))) |-------|--------------|-----------------| | alexnet | 18ms | 13ms | | squeezenet1_0 | 21ms | 8.4ms | -| squeezenet1_1 | | | +| squeezenet1_1 | 16ms | 5.5ms | | resnet18 | | | | resnet50 | | | | resnet101 | | | From 373a4e119a7b7db8f779fa40fc7a0f41b6fdcaac Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 12:48:57 -0400 Subject: [PATCH 005/355] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0bf4c787..daf26f63 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # torch2trt - A PyTorch -> TensorRT Converter -This is an experimental PyTorch to TensorRT converter which utilizes the -TensorRT Python API. We've found it useful in some examples, but it is +This is PyTorch to TensorRT converter which utilizes the +TensorRT Python API. It's very easy to use. We've found it useful in some examples ([see below](#Tested-models)), but it is not comprehensive. ### Setup From 3f5bbc64e845856141233cf09bdfca755aa37dfe Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 12:52:19 -0400 Subject: [PATCH 006/355] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index daf26f63..44050ec3 100644 --- a/README.md +++ b/README.md @@ -37,12 +37,14 @@ print(torch.max(torch.abs(y - y_trt))) ### Tested models +Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just includes model execution (not data copy). + | Model | PyTorch FP16 (Jetson Nano) | TensorRT FP16 (Jetson Nano) | |-------|--------------|-----------------| | alexnet | 18ms | 13ms | | squeezenet1_0 | 21ms | 8.4ms | | squeezenet1_1 | 16ms | 5.5ms | -| resnet18 | | | +| resnet18 | 29ms | 13ms | | resnet50 | | | | resnet101 | | | | resnet152 | | | From 5a04ea4c98e18b01da29709dd9e197cd801bb7d8 Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 12:55:49 -0400 Subject: [PATCH 007/355] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 44050ec3..9988331d 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,7 @@ Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just include | squeezenet1_0 | 21ms | 8.4ms | | squeezenet1_1 | 16ms | 5.5ms | | resnet18 | 29ms | 13ms | +| resnet34 | 55ms | 23ms | | resnet50 | | | | resnet101 | | | | resnet152 | | | From e3b348d1cd1f267211d1732b96e1355a85746d41 Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 12:59:19 -0400 Subject: [PATCH 008/355] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9988331d..a767823f 100644 --- a/README.md +++ b/README.md @@ -44,8 +44,8 @@ Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just include | alexnet | 18ms | 13ms | | squeezenet1_0 | 21ms | 8.4ms | | squeezenet1_1 | 16ms | 5.5ms | -| resnet18 | 29ms | 13ms | -| resnet34 | 55ms | 23ms | +| resnet18 | | 13ms | +| resnet34 | | 23ms | | resnet50 | | | | resnet101 | | | | resnet152 | | | From fba88cdb446a898a12da15d4773de0bc9d0c8daf Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 13:48:43 -0400 Subject: [PATCH 009/355] Update README.md --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a767823f..c6adcee6 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,12 @@ # torch2trt - A PyTorch -> TensorRT Converter This is PyTorch to TensorRT converter which utilizes the -TensorRT Python API. It's very easy to use. We've found it useful in some examples ([see below](#Tested-models)), but it is -not comprehensive. +TensorRT Python API. The goals of the converter are + +* Easy to use - Convert models with a single function call ``torch2trt`` +* Easy to extend - Write your own layer converter in Python and register it with ``@tensorrt_converter`` + +If you find an issue or write your own layer converter, please [let us know](../..//issues)! ### Setup From 30137403aa19c811ed6f90643f9afb34c2c6fd6b Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 14:23:51 -0400 Subject: [PATCH 010/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c6adcee6..1c0b2206 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just include | resnet18 | | 13ms | | resnet34 | | 23ms | | resnet50 | | | -| resnet101 | | | +| resnet101 | 135ms | 62ms | | resnet152 | | | | densenet121 | | | | densenet169 | | | From c5db74f76e8dc68842b90c4572e604839fc1148b Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 14:45:18 -0400 Subject: [PATCH 011/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1c0b2206..205ac608 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just include | squeezenet1_1 | 16ms | 5.5ms | | resnet18 | | 13ms | | resnet34 | | 23ms | -| resnet50 | | | +| resnet50 | 77ms | 38ms | | resnet101 | 135ms | 62ms | | resnet152 | | | | densenet121 | | | From 780de05d14dea812c2f1af9be4ede468faea1282 Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 15:03:39 -0400 Subject: [PATCH 012/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 205ac608..cdd66ebb 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just include | resnet34 | | 23ms | | resnet50 | 77ms | 38ms | | resnet101 | 135ms | 62ms | -| resnet152 | | | +| resnet152 | 200ms | 93ms | | densenet121 | | | | densenet169 | | | | densenet201 | | | From 344bcbe7315c6da89ac54f465e9d9cfe927728ad Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 15:24:18 -0400 Subject: [PATCH 013/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cdd66ebb..7ded697a 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just include | resnet50 | 77ms | 38ms | | resnet101 | 135ms | 62ms | | resnet152 | 200ms | 93ms | -| densenet121 | | | +| densenet121 | 83ms | 46ms | | densenet169 | | | | densenet201 | | | | densenet161 | | | From 215866eb2f661f47e5bd6cea2ac09b00c3e198bd Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 15:33:58 -0400 Subject: [PATCH 014/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7ded697a..09491aff 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just include | resnet101 | 135ms | 62ms | | resnet152 | 200ms | 93ms | | densenet121 | 83ms | 46ms | -| densenet169 | | | +| densenet169 | 116ms | 58ms | | densenet201 | | | | densenet161 | | | | vgg11 | | | From c76c3967a4a43089240486d889ff52afd0d04474 Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 15:43:28 -0400 Subject: [PATCH 015/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 09491aff..5c9abd4c 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # torch2trt - A PyTorch -> TensorRT Converter -This is PyTorch to TensorRT converter which utilizes the +torch2trt is a PyTorch to TensorRT converter which utilizes the TensorRT Python API. The goals of the converter are * Easy to use - Convert models with a single function call ``torch2trt`` From 667f659ba86da42ab9fc06737aa8b8f79f8c2e5d Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 15:46:10 -0400 Subject: [PATCH 016/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5c9abd4c..65a8b89e 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just include | resnet152 | 200ms | 93ms | | densenet121 | 83ms | 46ms | | densenet169 | 116ms | 58ms | -| densenet201 | | | +| densenet201 | 139ms | 75ms | | densenet161 | | | | vgg11 | | | | vgg13 | | | From c537aff34bd281e468d97f45537e6601da4f1dd8 Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 15:59:24 -0400 Subject: [PATCH 017/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 65a8b89e..902669dc 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just include | densenet121 | 83ms | 46ms | | densenet169 | 116ms | 58ms | | densenet201 | 139ms | 75ms | -| densenet161 | | | +| densenet161 | 209ms | 97ms | | vgg11 | | | | vgg13 | | | | vgg16 | | | From 732e4c6ac38f01fddb5a781f939cf70c3eb1424f Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 16:05:48 -0400 Subject: [PATCH 018/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 902669dc..97ed2c7d 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just include | alexnet | 18ms | 13ms | | squeezenet1_0 | 21ms | 8.4ms | | squeezenet1_1 | 16ms | 5.5ms | -| resnet18 | | 13ms | +| resnet18 | 32ms | 11ms | | resnet34 | | 23ms | | resnet50 | 77ms | 38ms | | resnet101 | 135ms | 62ms | From 98863ea49c1f7c5dc126a396b29414516ececf79 Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 16:08:54 -0400 Subject: [PATCH 019/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 97ed2c7d..0a426561 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just include | squeezenet1_0 | 21ms | 8.4ms | | squeezenet1_1 | 16ms | 5.5ms | | resnet18 | 32ms | 11ms | -| resnet34 | | 23ms | +| resnet34 | 58ms | 21ms | | resnet50 | 77ms | 38ms | | resnet101 | 135ms | 62ms | | resnet152 | 200ms | 93ms | From 8862cc2efb2c11c13246b28a161507efbe2d5983 Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 16:10:58 -0400 Subject: [PATCH 020/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0a426561..f0682b21 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ TensorRT Python API. The goals of the converter are * Easy to use - Convert models with a single function call ``torch2trt`` * Easy to extend - Write your own layer converter in Python and register it with ``@tensorrt_converter`` -If you find an issue or write your own layer converter, please [let us know](../..//issues)! +If you find, please [let us know](../..//issues)! We'd also love to hear if you add your own layer converter. It may be helpful to others. ### Setup From d53a41576c00c9a9f2a1727cac3e7844f746a3ab Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 16:12:40 -0400 Subject: [PATCH 021/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f0682b21..1a44425e 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ TensorRT Python API. The goals of the converter are * Easy to use - Convert models with a single function call ``torch2trt`` * Easy to extend - Write your own layer converter in Python and register it with ``@tensorrt_converter`` -If you find, please [let us know](../..//issues)! We'd also love to hear if you add your own layer converter. It may be helpful to others. +If you find an issue, please [let us know](../..//issues)! We'd also love to hear if you add your own layer converter. It may be helpful to others. ### Setup From ad0aec7544f8f314a68c1a23377b7985c28a0216 Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 16:13:10 -0400 Subject: [PATCH 022/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1a44425e..84552d6d 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ TensorRT Python API. The goals of the converter are * Easy to use - Convert models with a single function call ``torch2trt`` * Easy to extend - Write your own layer converter in Python and register it with ``@tensorrt_converter`` -If you find an issue, please [let us know](../..//issues)! We'd also love to hear if you add your own layer converter. It may be helpful to others. +If you find an issue, please [let us know](../..//issues)! We'd also love to hear if you add your own ``@tensorrt_converter``. It may be helpful to others. ### Setup From a0e19b5b921af9bffc3cdfcfb24f0cdb3dfb6600 Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 16:13:42 -0400 Subject: [PATCH 023/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 84552d6d..f8ab1994 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ TensorRT Python API. The goals of the converter are * Easy to use - Convert models with a single function call ``torch2trt`` * Easy to extend - Write your own layer converter in Python and register it with ``@tensorrt_converter`` -If you find an issue, please [let us know](../..//issues)! We'd also love to hear if you add your own ``@tensorrt_converter``. It may be helpful to others. +If you find an issue, please [let us know](../..//issues)! We'd also love to hear if create your own ``@tensorrt_converter``. It may be helpful to others. ### Setup From 176a9995f47b9e1c3863aaf5680ce1e07898ab9f Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 16:14:45 -0400 Subject: [PATCH 024/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f8ab1994..23172882 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just include | densenet169 | 116ms | 58ms | | densenet201 | 139ms | 75ms | | densenet161 | 209ms | 97ms | -| vgg11 | | | +| vgg11 | 61ms | 17ms | | vgg13 | | | | vgg16 | | | | vgg19 | | | From 371cda1d166468c8080045889c36f69845830177 Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 16:20:26 -0400 Subject: [PATCH 025/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 23172882..da3f294e 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just include | densenet201 | 139ms | 75ms | | densenet161 | 209ms | 97ms | | vgg11 | 61ms | 17ms | -| vgg13 | | | +| vgg13 | 96ms | 33ms | | vgg16 | | | | vgg19 | | | | vgg11_bn | | | From 5865fe9b33566d40709d0c1b2a024d538ec8e9c0 Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 16:27:30 -0400 Subject: [PATCH 026/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index da3f294e..d0c6007e 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just include | densenet161 | 209ms | 97ms | | vgg11 | 61ms | 17ms | | vgg13 | 96ms | 33ms | -| vgg16 | | | +| vgg16 | 137ms | 44ms | | vgg19 | | | | vgg11_bn | | | | vgg13_bn | | | From 46d8c7a3524d0ce925c1375dd0d8d3927b1f8217 Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 16:47:11 -0400 Subject: [PATCH 027/355] Update README.md --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index d0c6007e..3bdec141 100644 --- a/README.md +++ b/README.md @@ -98,3 +98,10 @@ the following may be unexpected. Please see the ``torch2trt.py`` module for more examples. + +### A comment on variable size tensors + +In case you're unfamilar, TensorRT currently does not support variable size Tensors. While this may seem +limiting, it can actually be a good constraint when designing your model for use in embedded systems. By +restricting to a fixed input size, we can expect similar memory usage and runtime. Ultimately, even if +TensorRT didn't have this constraint, you'd probably want to have it anyways :) From d7cd596e1da90cc399df866cecf3873106d265fe Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 16:49:54 -0400 Subject: [PATCH 028/355] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 3bdec141..9c0116bb 100644 --- a/README.md +++ b/README.md @@ -101,7 +101,8 @@ Please see the ``torch2trt.py`` module for more examples. ### A comment on variable size tensors -In case you're unfamilar, TensorRT currently does not support variable size Tensors. While this may seem +TensorRT currently does not support variable size Tensors, so whatever input shape you use when converting, you must use +when executing. While this may seem limiting, it can actually be a good constraint when designing your model for use in embedded systems. By restricting to a fixed input size, we can expect similar memory usage and runtime. Ultimately, even if TensorRT didn't have this constraint, you'd probably want to have it anyways :) From b4f2d972588126697eb690392ccef7d0938de151 Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 16:54:13 -0400 Subject: [PATCH 029/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9c0116bb..eabfbd93 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ TensorRT Python API. The goals of the converter are * Easy to use - Convert models with a single function call ``torch2trt`` * Easy to extend - Write your own layer converter in Python and register it with ``@tensorrt_converter`` -If you find an issue, please [let us know](../..//issues)! We'd also love to hear if create your own ``@tensorrt_converter``. It may be helpful to others. +If you find an issue, please [let us know](../..//issues)! We'd also love to hear if you create your own ``@tensorrt_converter``. It may be helpful to others. ### Setup From 103d26939f4eb929f56b474e29ecb6d19c478937 Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 16:57:03 -0400 Subject: [PATCH 030/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index eabfbd93..cdcc4346 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # torch2trt - A PyTorch -> TensorRT Converter torch2trt is a PyTorch to TensorRT converter which utilizes the -TensorRT Python API. The goals of the converter are +TensorRT Python API. The converter is * Easy to use - Convert models with a single function call ``torch2trt`` * Easy to extend - Write your own layer converter in Python and register it with ``@tensorrt_converter`` From cc4ffbc146eead733014cdbd74b0ead6f3177ba2 Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 17:28:07 -0400 Subject: [PATCH 031/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cdcc4346..12177700 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just include |-------|--------------|-----------------| | alexnet | 18ms | 13ms | | squeezenet1_0 | 21ms | 8.4ms | -| squeezenet1_1 | 16ms | 5.5ms | +| squeezenet1_1 | 13 | 4.7ms | | resnet18 | 32ms | 11ms | | resnet34 | 58ms | 21ms | | resnet50 | 77ms | 38ms | From f48bd4e4668f1c8330843d0008262d3e892f420b Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sat, 27 Apr 2019 15:13:58 -0700 Subject: [PATCH 032/355] example --- bug.ipynb | 93 ------------------------------------------------ example.ipynb | 98 +++++++++++++++++++++++---------------------------- 2 files changed, 44 insertions(+), 147 deletions(-) delete mode 100644 bug.ipynb diff --git a/bug.ipynb b/bug.ipynb deleted file mode 100644 index a74a3619..00000000 --- a/bug.ipynb +++ /dev/null @@ -1,93 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import torchvision.models\n", - "import torch\n", - "import tensorrt as trt\n", - "from torch2trt import torch2trt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model = torchvision.models.resnet18(pretrained=True).cuda().eval().half()\n", - "\n", - "input = torch.ones((1, 3, 224, 224)).cuda().half()\n", - "\n", - "model_trt = torch2trt(model, [input], fp16_mode=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import time\n", - "input = input.half()\n", - "\n", - "model = model.half()\n", - "output = model(input)\n", - "print(output.flatten()[0:10])\n", - "\n", - "t0 = time.time()\n", - "with torch.no_grad():\n", - " for i in range(50):\n", - " output = model(input)\n", - " #output = model_trt(input)\n", - "t1 = time.time()\n", - "\n", - "print((t1 - t0) / 50.0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import time\n", - "input = input.float()\n", - "output = model_trt(input)\n", - "print(output.flatten()[0:10])\n", - "\n", - "t0 = time.time()\n", - "with torch.no_grad():\n", - " for i in range(50):\n", - " output = model_trt(input)\n", - "t1 = time.time()\n", - "\n", - "print((t1 - t0) / 50.0)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.7" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/example.ipynb b/example.ipynb index e54775c4..8b87aa90 100644 --- a/example.ipynb +++ b/example.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 31, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -14,88 +14,78 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "def cross_validate(model, model_trt, data, tolerance=0.01):\n", - " y = model(data)\n", - " y_trt = model_trt(data)\n", - " max_error = torch.max(torch.abs(y - y_trt))\n", - " if max_error > tolerance:\n", - " raise RuntimeError('Model exceeds tolerance')" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "net = torch.nn.Sequential(*[\n", - " torch.nn.Conv2d(3, 32, 3),\n", - " torch.nn.ReLU(),\n", - " torch.nn.BatchNorm2d(32)\n", - "]).eval().half().cuda()\n", + "model = torchvision.models.squeezenet1_1(pretrained=True).cuda().eval().half()\n", "\n", - "data = torch.randn((1, 3, 224, 224)).cuda().half()" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "net_trt = torch2trt(net, [data], fp16_mode=True)" + "input = torch.ones((1, 3, 224, 224)).cuda().half()\n", + "\n", + "model_trt = torch2trt(model, [input], fp16_mode=True)" ] }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Help on instancemethod in module tensorrt.tensorrt:\n", - "\n", - "mark_output(...)\n", - " mark_output(self: tensorrt.tensorrt.INetworkDefinition, tensor: tensorrt.tensorrt.ITensor) -> None\n", - " \n", - " \n", - " Mark a tensor as an output.\n", - " \n", - " :arg tensor: The tensor to mark.\n", - "\n" + "tensor([1.1484, 0.7612, 0.7847, 0.3052, 0.3586, 2.0605, 0.0000, 0.7583, 0.0067,\n", + " 1.8486], device='cuda:0', dtype=torch.float16, grad_fn=)\n", + "0.013431086540222167\n" ] } ], "source": [ - "help(trt.INetworkDefinition.mark_output)" + "import time\n", + "input = input.half()\n", + "\n", + "model = model.half()\n", + "output = model(input)\n", + "print(output.flatten()[0:10])\n", + "\n", + "t0 = time.time()\n", + "with torch.no_grad():\n", + " for i in range(50):\n", + " output = model(input)\n", + " #output = model_trt(input)\n", + "t1 = time.time()\n", + "\n", + "print((t1 - t0) / 50.0)" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 11, "metadata": {}, "outputs": [ { - "ename": "RuntimeError", - "evalue": "expected type torch.cuda.FloatTensor but got torch.cuda.HalfTensor", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcross_validate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnet\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnet_trt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m\u001b[0m in \u001b[0;36mcross_validate\u001b[0;34m(model, model_trt, data, tolerance)\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0my_trt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel_trt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mmax_error\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mabs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0my_trt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmax_error\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0mtolerance\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Model exceeds tolerance'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mRuntimeError\u001b[0m: expected type torch.cuda.FloatTensor but got torch.cuda.HalfTensor" + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([1.3467, 0.7876, 0.5308, 0.4924, 1.0488, 0.8022, 0.2668, 3.1426, 0.8687,\n", + " 3.0195], device='cuda:0', dtype=torch.float16)\n", + "0.0047100019454956055\n" ] } ], "source": [ - "cross_validate(net, net_trt, data)" + "import time\n", + "input = input.float()\n", + "output = model_trt(input)\n", + "print(output.flatten()[0:10])\n", + "\n", + "t0 = time.time()\n", + "with torch.no_grad():\n", + " for i in range(50):\n", + " output = model_trt(input)\n", + "t1 = time.time()\n", + "\n", + "print((t1 - t0) / 50.0)" ] } ], From c3b6a6f9798a8b40069ae9eb3c6b2df0df1491de Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 20:36:04 -0400 Subject: [PATCH 033/355] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 12177700..c1ae6fcc 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,7 @@ Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just include | vgg13_bn | | | | vgg16_bn | | | | vgg19_bn | | | +| [mobilenet_v2](https://github.com/tonylins/pytorch-mobilenet-v2) | | | ### Add (or override) a converter From 5b74d1c5a989105f9bd8578a0bd25edaad4d1aae Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 20:39:05 -0400 Subject: [PATCH 034/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c1ae6fcc..43f4caac 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,7 @@ Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just include | vgg13_bn | | | | vgg16_bn | | | | vgg19_bn | | | -| [mobilenet_v2](https://github.com/tonylins/pytorch-mobilenet-v2) | | | +| [mobilenet_v2](https://github.com/tonylins/pytorch-mobilenet-v2) | 27ms | 16ms | ### Add (or override) a converter From 77526c1722f06ff073cf81353ec9f4eed6893eee Mon Sep 17 00:00:00 2001 From: John Date: Sat, 27 Apr 2019 21:33:40 -0400 Subject: [PATCH 035/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 43f4caac..7dadd293 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just include |-------|--------------|-----------------| | alexnet | 18ms | 13ms | | squeezenet1_0 | 21ms | 8.4ms | -| squeezenet1_1 | 13 | 4.7ms | +| squeezenet1_1 | 13ms | 4.7ms | | resnet18 | 32ms | 11ms | | resnet34 | 58ms | 21ms | | resnet50 | 77ms | 38ms | From 442f5e08e1755af57dfd6ae32bea1416733a40ca Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 28 Apr 2019 11:30:52 -0700 Subject: [PATCH 036/355] removed test --- example.ipynb | 45 ++++++++++++++++----------------------------- test.py | 41 ----------------------------------------- test.sh | 22 ---------------------- 3 files changed, 16 insertions(+), 92 deletions(-) delete mode 100644 test.py delete mode 100644 test.sh diff --git a/example.ipynb b/example.ipynb index 8b87aa90..0e2a4f8a 100644 --- a/example.ipynb +++ b/example.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -14,32 +14,29 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "model = torchvision.models.squeezenet1_1(pretrained=True).cuda().eval().half()\n", - "\n", - "input = torch.ones((1, 3, 224, 224)).cuda().half()\n", + "model = torchvision.models.resnet18(pretrained=True).cuda().eval()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "input = torch.ones((1, 3, 224, 224)).cuda()\n", "\n", "model_trt = torch2trt(model, [input], fp16_mode=True)" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([1.1484, 0.7612, 0.7847, 0.3052, 0.3586, 2.0605, 0.0000, 0.7583, 0.0067,\n", - " 1.8486], device='cuda:0', dtype=torch.float16, grad_fn=)\n", - "0.013431086540222167\n" - ] - } - ], + "outputs": [], "source": [ "import time\n", "input = input.half()\n", @@ -60,19 +57,9 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([1.3467, 0.7876, 0.5308, 0.4924, 1.0488, 0.8022, 0.2668, 3.1426, 0.8687,\n", - " 3.0195], device='cuda:0', dtype=torch.float16)\n", - "0.0047100019454956055\n" - ] - } - ], + "outputs": [], "source": [ "import time\n", "input = input.float()\n", diff --git a/test.py b/test.py deleted file mode 100644 index b6c1f8fd..00000000 --- a/test.py +++ /dev/null @@ -1,41 +0,0 @@ -import torch -import argparse -import torchvision.models -from torch2trt import torch2trt -import time - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('model') - args = parser.parse_args() - - input = torch.randn((1, 3, 224, 224)).cuda().half() - - with torch.no_grad(): - model = getattr(torchvision.models, str(args.model))(pretrained=True).cuda().half().eval() - model_trt = torch2trt(model, [input], fp16_mode=True) - - # run pytorch - output = model(input) - t0 = time.time() - for i in range(100): - output = model(input) - t1 = time.time() - - dt_pytorch = (t1 - t0) / 100.0 - - output = model_trt(input) - t0 = time.time() - for i in range(100): - output = model_trt(input) - t1 = time.time() - - dt_tensorrt = (t1 - t0) / 100.0 - - line = '%s\t%f\t%f' % (args.model, dt_pytorch, dt_tensorrt) - - print(line) - - with open('timings.txt', 'a') as f: - f.write(line + '\n') \ No newline at end of file diff --git a/test.sh b/test.sh deleted file mode 100644 index 9ef3a9e8..00000000 --- a/test.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/sh - -python3 test.py alexnet -python3 test.py squeezenet1_0 -python3 test.py squeezenet1_1 -python3 test.py resnet18 -python3 test.py resnet34 -python3 test.py resnet50 -python3 test.py resnet101 -python3 test.py resnet152 -python3 test.py densenet121 -python3 test.py densenet169 -python3 test.py densenet201 -python3 test.py densenet161 -python3 test.py vgg11 -python3 test.py vgg13 -python3 test.py vgg16 -python3 test.py vgg19 -python3 test.py vgg11_bn -python3 test.py vgg13_bn -python3 test.py vgg16_bn -python3 test.py vgg19_bn \ No newline at end of file From b4fdbf41b2b2f69000de0a82479cc156030dbded Mon Sep 17 00:00:00 2001 From: John Date: Sun, 28 Apr 2019 14:43:00 -0400 Subject: [PATCH 037/355] Update README.md --- README.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7dadd293..558e7a0d 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,18 @@ Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just include | [mobilenet_v2](https://github.com/tonylins/pytorch-mobilenet-v2) | 27ms | 16ms | -### Add (or override) a converter +### How does it work? + +This converter works by attaching conversion functions (like ``convert_ReLU``) to the original +PyTorch functional calls (like ``torch.nn.ReLU.forward``). The sample input data is passed +through the network, just as before, except now whenever a registered function (``torch.nn.ReLU.forward``) +is encountered, the corresponding converter (``convert_ReLU``) is called. The converter +is passed some information, like the arguments to the original PyTorch function, the TensorRT +network that is currently being constructed, and a dictionary of TensorRT tensors that have been +added. We call this collection of information the ``context`` in which the converter is called. +You can use it to extend TensorRT network. + +### How to add (or override) a converter Here we show how to add an example converter using the TensorRT python API. From 65055d9f008b5e72af9b467bfe1dfa284ac26b03 Mon Sep 17 00:00:00 2001 From: John Date: Sun, 28 Apr 2019 14:45:20 -0400 Subject: [PATCH 038/355] Update README.md --- README.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 558e7a0d..155d763f 100644 --- a/README.md +++ b/README.md @@ -75,9 +75,9 @@ PyTorch functional calls (like ``torch.nn.ReLU.forward``). The sample input dat through the network, just as before, except now whenever a registered function (``torch.nn.ReLU.forward``) is encountered, the corresponding converter (``convert_ReLU``) is called. The converter is passed some information, like the arguments to the original PyTorch function, the TensorRT -network that is currently being constructed, and a dictionary of TensorRT tensors that have been -added. We call this collection of information the ``context`` in which the converter is called. -You can use it to extend TensorRT network. +network that is currently being constructed, and a dictionary of TensorRT tensors that have already been +added. The converter then uses this information to add layers to the TensorRT network, and +add any new TensorRT tensors to the growing dictionary. ### How to add (or override) a converter @@ -100,11 +100,12 @@ def convert_ReLU(ctx): The converter takes one argument, a ``ConversionContext``, which will contain the following -* network: The TensorRT network that is being constructed. -* method_args: Positional arguments that were passed to the specified Torch function. -* method_kwargs: Keyword arguments that were passed to the specified Torch function. -* method_return: The value returned by the specified Torch function. -* trt_tensors: A dictionary mapping Torch tensors (by hash value) to TensorRT tensors. The +* network - The TensorRT network that is being constructed. + +* method_args - Positional arguments that were passed to the specified Torch function. +* method_kwargs - Keyword arguments that were passed to the specified Torch function. +* method_return - The value returned by the specified Torch function. +* trt_tensors - A dictionary mapping Torch tensors (by hash value) to TensorRT tensors. The converter must the set values for any output Tensors. Otherwise, if a later function uses the Torch tensor, and there is not an associated TensorRT tensor in the map, results may be unexpected. From 839e68b6a1a1244986b8ab9cbed3fa5d2d9e7f0f Mon Sep 17 00:00:00 2001 From: John Date: Sun, 28 Apr 2019 14:46:46 -0400 Subject: [PATCH 039/355] Update README.md --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 155d763f..1942a160 100644 --- a/README.md +++ b/README.md @@ -102,12 +102,12 @@ the following * network - The TensorRT network that is being constructed. -* method_args - Positional arguments that were passed to the specified Torch function. -* method_kwargs - Keyword arguments that were passed to the specified Torch function. -* method_return - The value returned by the specified Torch function. -* trt_tensors - A dictionary mapping Torch tensors (by hash value) to TensorRT tensors. The +* ``ctx.method_args`` - Positional arguments that were passed to the specified PyTorch function. +* ``ctx.method_kwargs`` - Keyword arguments that were passed to the specified PyTorch function. +* ``ctx.method_return`` - The value returned by the specified PyTorch function. +* ``ctx.trt_tensors`` - A dictionary mapping PyTorch tensors (by hash value) to TensorRT tensors. The converter must the set values for any output Tensors. Otherwise, if a later function uses - the Torch tensor, and there is not an associated TensorRT tensor in the map, results + the PyTorch tensor, and there is not an associated TensorRT tensor in the map, results may be unexpected. Please see the ``torch2trt.py`` module for more examples. From 79a0d81d6cdbdbdc39e48d1d85cad5da4857d5d3 Mon Sep 17 00:00:00 2001 From: John Date: Sun, 28 Apr 2019 14:47:15 -0400 Subject: [PATCH 040/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1942a160..812506ee 100644 --- a/README.md +++ b/README.md @@ -100,7 +100,7 @@ def convert_ReLU(ctx): The converter takes one argument, a ``ConversionContext``, which will contain the following -* network - The TensorRT network that is being constructed. +* ``ctx.network`` - The TensorRT network that is being constructed. * ``ctx.method_args`` - Positional arguments that were passed to the specified PyTorch function. * ``ctx.method_kwargs`` - Keyword arguments that were passed to the specified PyTorch function. From 65b09110f9021e4e237d8a48d24deaf067ffe2d7 Mon Sep 17 00:00:00 2001 From: John Date: Sun, 28 Apr 2019 14:47:55 -0400 Subject: [PATCH 041/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 812506ee..186fd4e2 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just include This converter works by attaching conversion functions (like ``convert_ReLU``) to the original PyTorch functional calls (like ``torch.nn.ReLU.forward``). The sample input data is passed through the network, just as before, except now whenever a registered function (``torch.nn.ReLU.forward``) -is encountered, the corresponding converter (``convert_ReLU``) is called. The converter +is encountered, the corresponding converter (``convert_ReLU``) is also called afterwards. The converter is passed some information, like the arguments to the original PyTorch function, the TensorRT network that is currently being constructed, and a dictionary of TensorRT tensors that have already been added. The converter then uses this information to add layers to the TensorRT network, and From ab1842472c86ff183cbb8ad5d81b534dc1a1a718 Mon Sep 17 00:00:00 2001 From: John Date: Sun, 28 Apr 2019 14:48:54 -0400 Subject: [PATCH 042/355] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 186fd4e2..3c88435c 100644 --- a/README.md +++ b/README.md @@ -75,8 +75,8 @@ PyTorch functional calls (like ``torch.nn.ReLU.forward``). The sample input dat through the network, just as before, except now whenever a registered function (``torch.nn.ReLU.forward``) is encountered, the corresponding converter (``convert_ReLU``) is also called afterwards. The converter is passed some information, like the arguments to the original PyTorch function, the TensorRT -network that is currently being constructed, and a dictionary of TensorRT tensors that have already been -added. The converter then uses this information to add layers to the TensorRT network, and +network that is currently being constructed, and a dictionary mapping of PyTorch->TensorRT tensors that have already been +added to the TensorRT network. The converter then uses this information to add layers to the TensorRT network, and add any new TensorRT tensors to the growing dictionary. ### How to add (or override) a converter From 9f829da025f653c97bbdbab5dfb3a2c8dcc42ef8 Mon Sep 17 00:00:00 2001 From: John Date: Sun, 28 Apr 2019 14:49:33 -0400 Subject: [PATCH 043/355] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 3c88435c..5ff3cba3 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ torch2trt is a PyTorch to TensorRT converter which utilizes the TensorRT Python API. The converter is * Easy to use - Convert models with a single function call ``torch2trt`` + * Easy to extend - Write your own layer converter in Python and register it with ``@tensorrt_converter`` If you find an issue, please [let us know](../..//issues)! We'd also love to hear if you create your own ``@tensorrt_converter``. It may be helpful to others. From 581368a9b4a87ad1491989119be16d9e787b451e Mon Sep 17 00:00:00 2001 From: John Date: Sun, 28 Apr 2019 14:49:55 -0400 Subject: [PATCH 044/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5ff3cba3..58f6162d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# torch2trt - A PyTorch -> TensorRT Converter +# torch2trt torch2trt is a PyTorch to TensorRT converter which utilizes the TensorRT Python API. The converter is From f6acbb6102938afeaac3f6d1ba9a4d09b59023dd Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 28 Apr 2019 13:25:34 -0700 Subject: [PATCH 045/355] fixed bug with bias=False for non-fp32 --- torch2trt.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/torch2trt.py b/torch2trt.py index 97c2a0be..aa455a60 100644 --- a/torch2trt.py +++ b/torch2trt.py @@ -266,7 +266,9 @@ def convert_Conv2d(ctx): if not isinstance(padding, tuple): padding = (padding, ) * 2 - bias = trt.Weights() + kernel = module.weight.detach().cpu().numpy() + + bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) if module.bias is not None: bias = module.bias.detach().cpu().numpy() @@ -274,7 +276,7 @@ def convert_Conv2d(ctx): input=trt_input, num_output_maps=module.out_channels, kernel_shape=kernel_size, - kernel=module.weight.detach().cpu().numpy(), + kernel=kernel, bias=bias) layer.stride = stride layer.padding = padding @@ -440,10 +442,11 @@ def convert_BatchNorm2d(ctx): input = ctx.method_args[1] output = ctx.method_return trt_input = ctx.trt_tensors[input.__hash__()] - + scale = module.weight.detach().cpu().numpy() / np.sqrt(module.running_var.detach().cpu().numpy() + module.eps) bias = module.bias.detach().cpu().numpy() - module.running_mean.detach().cpu().numpy() * scale power = np.ones_like(scale) + layer = ctx.network.add_scale(trt_input, trt.ScaleMode.CHANNEL, bias, scale, power) ctx.trt_tensors[output.__hash__()] = layer.get_output(0) From ebfa1e2bf876391ffb12460fce1bf9bdf7c9a0bd Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 2 May 2019 11:00:57 -0700 Subject: [PATCH 046/355] removed dict map --- torch2trt.py | 72 ++++++++++++++++++++++------------------------------ 1 file changed, 30 insertions(+), 42 deletions(-) diff --git a/torch2trt.py b/torch2trt.py index aa455a60..83fe3c5d 100644 --- a/torch2trt.py +++ b/torch2trt.py @@ -104,7 +104,6 @@ def __exit__(self, type, val, tb): class ConversionContext(object): def __init__(self, network, converters=CONVERTERS): self.network = network - self.trt_tensors = {} self.method_args = None self.method_kwargs = None self.method_return = None @@ -128,14 +127,14 @@ def add_inputs(self, torch_inputs, names=None): self.input_names = names for i, torch_input in enumerate(torch_inputs): - if torch_input.__hash__() not in self.trt_tensors: + if not hasattr(torch_input, '_trt'): trt_tensor = self.network.add_input( name=names[i], shape=tuple(torch_input.shape)[1:], dtype=torch_dtype_to_trt(torch_input.dtype), ) trt_tensor.location = torch_device_to_trt(torch_input.device) - self.trt_tensors[torch_input.__hash__()] = trt_tensor + torch_input._trt = trt_tensor def mark_outputs(self, torch_outputs, names=None): if names is None: @@ -143,7 +142,7 @@ def mark_outputs(self, torch_outputs, names=None): self.output_names = names for i, torch_output in enumerate(torch_outputs): - trt_tensor = self.trt_tensors[torch_output.__hash__()] + trt_tensor = torch_output._trt trt_tensor.name = names[i] trt_tensor.location = torch_device_to_trt(torch_output.device) trt_tensor.dtype = torch_dtype_to_trt(torch_output.dtype) @@ -236,15 +235,14 @@ def convert_Linear(ctx): module = ctx.method_args[0] input = ctx.method_args[1] output = ctx.method_return - trt_input = ctx.trt_tensors[input.__hash__()] layer = ctx.network.add_fully_connected( - input=trt_input, + input=input._trt, num_outputs=module.out_features, kernel=module.weight.detach().cpu().numpy(), bias=module.bias.detach().cpu().numpy()) - ctx.trt_tensors[output.__hash__()] = layer.get_output(0) + output._trt = layer.get_output(0) @tensorrt_converter('torch.nn.Conv2d.forward') @@ -252,7 +250,6 @@ def convert_Conv2d(ctx): module = ctx.method_args[0] input = ctx.method_args[1] output = ctx.method_return - trt_input = ctx.trt_tensors[input.__hash__()] kernel_size = module.kernel_size if not isinstance(kernel_size, tuple): @@ -273,7 +270,7 @@ def convert_Conv2d(ctx): bias = module.bias.detach().cpu().numpy() layer = ctx.network.add_convolution( - input=trt_input, + input=input._trt, num_output_maps=module.out_channels, kernel_shape=kernel_size, kernel=kernel, @@ -284,7 +281,7 @@ def convert_Conv2d(ctx): if module.groups is not None: layer.num_groups = module.groups - ctx.trt_tensors[output.__hash__()] = layer.get_output(0) + output._trt = layer.get_output(0) @tensorrt_converter('torch.nn.MaxPool2d.forward') @@ -292,7 +289,6 @@ def convert_MaxPool2d(ctx): module = ctx.method_args[0] input = ctx.method_args[1] output = ctx.method_return - trt_input = ctx.trt_tensors[input.__hash__()] kernel_size = module.kernel_size if not isinstance(kernel_size, tuple): @@ -307,11 +303,11 @@ def convert_MaxPool2d(ctx): padding = (padding, ) * 2 layer = ctx.network.add_pooling( - input=trt_input, type=trt.PoolingType.MAX, window_size=kernel_size) + input=input._trt, type=trt.PoolingType.MAX, window_size=kernel_size) layer.stride = stride layer.padding = padding - ctx.trt_tensors[output.__hash__()] = layer.get_output(0) + output._trt = layer.get_output(0) @tensorrt_converter('torch.nn.AvgPool2d.forward') @@ -319,7 +315,6 @@ def convert_AvgPool2d(ctx): module = ctx.method_args[0] input = ctx.method_args[1] output = ctx.method_return - trt_input = ctx.trt_tensors[input.__hash__()] kernel_size = module.kernel_size if not isinstance(kernel_size, tuple): @@ -332,12 +327,12 @@ def convert_AvgPool2d(ctx): padding = (padding, ) * 2 layer = ctx.network.add_pooling( - input=trt_input, type=trt.PoolingType.AVERAGE, window_size=kernel_size) + input=input._trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size) layer.stride = stride layer.padding = padding layer.average_count_excludes_padding = not module.count_include_pad - ctx.trt_tensors[output.__hash__()] = layer.get_output(0) + output._trt = layer.get_output(0) @tensorrt_converter('torch.nn.AdaptiveAvgPool2d.forward') @@ -345,20 +340,19 @@ def convert_AdaptiveAvgPool2d(ctx): module = ctx.method_args[0] input = ctx.method_args[1] output = ctx.method_return - trt_input = ctx.trt_tensors[input.__hash__()] output_size = module.output_size if not isinstance(output_size, tuple): output_size = (output_size, ) * 2 - stride = (trt_input.shape[-2] // output_size[-2], trt_input.shape[-1] // output_size[-1]) + stride = (input._trt.shape[-2] // output_size[-2], input._trt.shape[-1] // output_size[-1]) kernel_size = stride layer = ctx.network.add_pooling( - input=trt_input, type=trt.PoolingType.AVERAGE, window_size=kernel_size) + input=input._trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size) layer.stride = stride - ctx.trt_tensors[output.__hash__()] = layer.get_output(0) + output._trt = layer.get_output(0) @tensorrt_converter('torch.nn.functional.adaptive_avg_pool2d') @@ -371,10 +365,9 @@ def convert_adaptive_avg_pool2d(ctx): def convert_ReLU(ctx): input = ctx.method_args[1] output = ctx.method_return - trt_input = ctx.trt_tensors[input.__hash__()] layer = ctx.network.add_activation( - input=trt_input, type=trt.ActivationType.RELU) - ctx.trt_tensors[output.__hash__()] = layer.get_output(0) + input=input._trt, type=trt.ActivationType.RELU) + output._trt = layer.get_output(0) @tensorrt_converter('torch.nn.functional.relu') @@ -387,17 +380,16 @@ def convert_relu(ctx): def convert_ReLU6(ctx): input = ctx.method_args[1] output = ctx.method_return - trt_input = ctx.trt_tensors[input.__hash__()] layer = ctx.network.add_activation( - input=trt_input, type=trt.ActivationType.RELU) - shape = (1, ) * len(trt_input.shape) # broadcast all dimensions - tensor = 6.0 * torch.ones(shape, dtype=torch_dtype_from_trt(trt_input.dtype)).cpu().numpy() + input=input._trt, type=trt.ActivationType.RELU) + shape = (1, ) * len(input._trt.shape) # broadcast all dimensions + tensor = 6.0 * torch.ones(shape, dtype=torch_dtype_from_trt(input._trt.dtype)).cpu().numpy() trt_6 = ctx.network.add_constant(shape, tensor) layer = ctx.network.add_elementwise( layer.get_output(0), trt_6.get_output(0), trt.ElementWiseOperation.MIN) - ctx.trt_tensors[output.__hash__()] = layer.get_output(0) + output._trt = layer.get_output(0) @tensorrt_converter('torch.nn.functional.relu6') @@ -410,11 +402,10 @@ def convert_relu6(ctx): def convert_LogSoftmax(ctx): input = ctx.method_args[1] output = ctx.method_return - trt_input = ctx.trt_tensors[input.__hash__()] - layer = ctx.network.add_softmax(input=trt_input) + layer = ctx.network.add_softmax(input=input._trt) layer = ctx.network.add_unary(input=layer.get_output(0), op=trt.UnaryOperation.LOG) - ctx.trt_tensors[output.__hash__()] = layer.get_output(0) + output._trt = layer.get_output(0) @tensorrt_converter('torch.nn.Dropout.forward') @@ -423,7 +414,7 @@ def convert_LogSoftmax(ctx): def convert_Identity(ctx): input = ctx.method_args[1] output = ctx.method_return - ctx.trt_tensors[output.__hash__()] = ctx.trt_tensors[input.__hash__()] + output._trt = input._trt @tensorrt_converter('torch.Tensor.view') @@ -433,7 +424,7 @@ def convert_Identity(ctx): def convert_identity(ctx): input = ctx.method_args[0] output = ctx.method_return - ctx.trt_tensors[output.__hash__()] = ctx.trt_tensors[input.__hash__()] + output._trt = input._trt @tensorrt_converter('torch.nn.BatchNorm2d.forward') @@ -441,15 +432,14 @@ def convert_BatchNorm2d(ctx): module = ctx.method_args[0] input = ctx.method_args[1] output = ctx.method_return - trt_input = ctx.trt_tensors[input.__hash__()] scale = module.weight.detach().cpu().numpy() / np.sqrt(module.running_var.detach().cpu().numpy() + module.eps) bias = module.bias.detach().cpu().numpy() - module.running_mean.detach().cpu().numpy() * scale power = np.ones_like(scale) - layer = ctx.network.add_scale(trt_input, trt.ScaleMode.CHANNEL, bias, scale, power) + layer = ctx.network.add_scale(input._trt, trt.ScaleMode.CHANNEL, bias, scale, power) - ctx.trt_tensors[output.__hash__()] = layer.get_output(0) + output._trt = layer.get_output(0) # TENSOR METHOD CONVERTERS @@ -465,11 +455,11 @@ def convert_cat(ctx): dim = ctx.method_args[1] output = ctx.method_return - trt_inputs = [ctx.trt_tensors[i.__hash__()] for i in inputs] + trt_inputs = [i._trt for i in inputs] layer = ctx.network.add_concatenation(inputs=trt_inputs) layer.axis = dim - 1 - ctx.trt_tensors[output.__hash__()] = layer.get_output(0) + output._trt = layer.get_output(0) @tensorrt_converter('torch.Tensor.__iadd__') @@ -478,7 +468,5 @@ def convert_add(ctx): input_a = ctx.method_args[0] input_b = ctx.method_args[1] output = ctx.method_return - trt_input_a = ctx.trt_tensors[input_a.__hash__()] - trt_input_b = ctx.trt_tensors[input_b.__hash__()] - layer = ctx.network.add_elementwise(trt_input_a, trt_input_b, trt.ElementWiseOperation.SUM) - ctx.trt_tensors[output.__hash__()] = layer.get_output(0) + layer = ctx.network.add_elementwise(input_a._trt, input_b._trt, trt.ElementWiseOperation.SUM) + output._trt = layer.get_output(0) \ No newline at end of file From b0105047b48f31d8de43101a8ee4c254d2033313 Mon Sep 17 00:00:00 2001 From: John Date: Thu, 2 May 2019 14:30:09 -0400 Subject: [PATCH 047/355] Update README.md --- README.md | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 58f6162d..3d59db99 100644 --- a/README.md +++ b/README.md @@ -75,10 +75,12 @@ This converter works by attaching conversion functions (like ``convert_ReLU``) t PyTorch functional calls (like ``torch.nn.ReLU.forward``). The sample input data is passed through the network, just as before, except now whenever a registered function (``torch.nn.ReLU.forward``) is encountered, the corresponding converter (``convert_ReLU``) is also called afterwards. The converter -is passed some information, like the arguments to the original PyTorch function, the TensorRT -network that is currently being constructed, and a dictionary mapping of PyTorch->TensorRT tensors that have already been -added to the TensorRT network. The converter then uses this information to add layers to the TensorRT network, and -add any new TensorRT tensors to the growing dictionary. +is passed the arguments and return statement of the original PyTorch function, as well as the TensorRT +network that is being constructed. The input tensors to the original PyTorch function are modified to +have an attribute ``_trt``, which is the TensorRT counterpart to the PyTorch tensor. The conversion function +uses this ``_trt`` to add layers to the TensorRT network, and then sets the ``_trt`` attribute for +relevant output tensors. Once the model is fully executed, the final tensors returns are marked as outputs +of the TensorRT network, and the network is built. ### How to add (or override) a converter @@ -91,11 +93,10 @@ from torch2trt import tensorrt_converter @tensorrt_converter('torch.nn.ReLU.forward') def convert_ReLU(ctx): - input_tensor = ctx.method_args[1] - output_tensor = ctx.method_return - trt_input = ctx.trt_tensors[input_tensor.__hash__()] - layer = ctx.network.add_activation(input=trt_input, type=trt.ActivationType.RELU) - ctx.trt_tensors[output_tensor.__hash__()] = layer.get_output(0) + input = ctx.method_args[1] + output = ctx.method_return + layer = ctx.network.add_activation(input=input._trt, type=trt.ActivationType.RELU) + output._trt = layer.get_output(0) ``` The converter takes one argument, a ``ConversionContext``, which will contain @@ -103,13 +104,9 @@ the following * ``ctx.network`` - The TensorRT network that is being constructed. -* ``ctx.method_args`` - Positional arguments that were passed to the specified PyTorch function. +* ``ctx.method_args`` - Positional arguments that were passed to the specified PyTorch function. The ``_trt`` attribute is set for relevant input tensors. * ``ctx.method_kwargs`` - Keyword arguments that were passed to the specified PyTorch function. -* ``ctx.method_return`` - The value returned by the specified PyTorch function. -* ``ctx.trt_tensors`` - A dictionary mapping PyTorch tensors (by hash value) to TensorRT tensors. The - converter must the set values for any output Tensors. Otherwise, if a later function uses - the PyTorch tensor, and there is not an associated TensorRT tensor in the map, results - may be unexpected. +* ``ctx.method_return`` - The value returned by the specified PyTorch function. The converter must set the ``_trt`` attribute where relevant. Please see the ``torch2trt.py`` module for more examples. From 7eb06b6d3d789ac964dbc146dde9b1a2212b4152 Mon Sep 17 00:00:00 2001 From: John Date: Thu, 2 May 2019 14:30:51 -0400 Subject: [PATCH 048/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3d59db99..2f5afe6b 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ network that is being constructed. The input tensors to the original PyTorch fu have an attribute ``_trt``, which is the TensorRT counterpart to the PyTorch tensor. The conversion function uses this ``_trt`` to add layers to the TensorRT network, and then sets the ``_trt`` attribute for relevant output tensors. Once the model is fully executed, the final tensors returns are marked as outputs -of the TensorRT network, and the network is built. +of the TensorRT network, and the optimized TensorRT engine is built. ### How to add (or override) a converter From 027a9997d585a5fc589c1ebb5f87d5fae311c1bc Mon Sep 17 00:00:00 2001 From: John Date: Thu, 2 May 2019 14:32:11 -0400 Subject: [PATCH 049/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2f5afe6b..9cf2782a 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ of the TensorRT network, and the optimized TensorRT engine is built. ### How to add (or override) a converter -Here we show how to add an example converter using the TensorRT +Here we show how to add a converter for the ``ReLU`` module using the TensorRT python API. ```python From 5af20cb1d0040e4c1fee5bc1465ce7b893da2136 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 2 May 2019 12:48:52 -0700 Subject: [PATCH 050/355] added conv2d transpose. does not support output_padding > 0 --- torch2trt.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/torch2trt.py b/torch2trt.py index 83fe3c5d..29b3df1e 100644 --- a/torch2trt.py +++ b/torch2trt.py @@ -283,6 +283,45 @@ def convert_Conv2d(ctx): output._trt = layer.get_output(0) + +@tensorrt_converter('torch.nn.ConvTranspose2d.forward') +def convert_ConvTranspose2d(ctx): + module = ctx.method_args[0] + input = ctx.method_args[1] + output = ctx.method_return + + kernel_size = module.kernel_size + if not isinstance(kernel_size, tuple): + kernel_size = (kernel_size, ) * 2 + + stride = module.stride + if not isinstance(stride, tuple): + stride = (stride, ) * 2 + + padding = module.padding + if not isinstance(padding, tuple): + padding = (padding, ) * 2 + + kernel = module.weight.detach().cpu().numpy() + + bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) + if module.bias is not None: + bias = module.bias.detach().cpu().numpy() + + layer = ctx.network.add_deconvolution( + input=input._trt, + num_output_maps=module.out_channels, + kernel_shape=kernel_size, + kernel=kernel, + bias=bias) + layer.stride = stride + layer.padding = padding + + if module.groups is not None: + layer.num_groups = module.groups + + output._trt = layer.get_output(0) + @tensorrt_converter('torch.nn.MaxPool2d.forward') def convert_MaxPool2d(ctx): From a1493ab6a081b60b7ab27bdd8e48289de2ade212 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 16 May 2019 17:23:23 -0700 Subject: [PATCH 051/355] live example --- notebooks/resnet18/imagenet_labels.json | 1002 +++++++++++++++++++++++ notebooks/resnet18/resnet18.ipynb | 250 ++++++ torch2trt.py | 46 +- 3 files changed, 1291 insertions(+), 7 deletions(-) create mode 100644 notebooks/resnet18/imagenet_labels.json create mode 100644 notebooks/resnet18/resnet18.ipynb diff --git a/notebooks/resnet18/imagenet_labels.json b/notebooks/resnet18/imagenet_labels.json new file mode 100644 index 00000000..d3314cfa --- /dev/null +++ b/notebooks/resnet18/imagenet_labels.json @@ -0,0 +1,1002 @@ +[ +"tench, Tinca tinca", +"goldfish, Carassius auratus", +"great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias", +"tiger shark, Galeocerdo cuvieri", +"hammerhead, hammerhead shark", +"electric ray, crampfish, numbfish, torpedo", +"stingray", +"cock", +"hen", +"ostrich, Struthio camelus", +"brambling, Fringilla montifringilla", +"goldfinch, Carduelis carduelis", +"house finch, linnet, Carpodacus mexicanus", +"junco, snowbird", +"indigo bunting, indigo finch, indigo bird, Passerina cyanea", +"robin, American robin, Turdus migratorius", +"bulbul", +"jay", +"magpie", +"chickadee", +"water ouzel, dipper", +"kite", +"bald eagle, American eagle, Haliaeetus leucocephalus", +"vulture", +"great grey owl, great gray owl, Strix nebulosa", +"European fire salamander, Salamandra salamandra", +"common newt, Triturus vulgaris", +"eft", +"spotted salamander, Ambystoma maculatum", +"axolotl, mud puppy, Ambystoma mexicanum", +"bullfrog, Rana catesbeiana", +"tree frog, tree-frog", +"tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui", +"loggerhead, loggerhead turtle, Caretta caretta", +"leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea", +"mud turtle", +"terrapin", +"box turtle, box tortoise", +"banded gecko", +"common iguana, iguana, Iguana iguana", +"American chameleon, anole, Anolis carolinensis", +"whiptail, whiptail lizard", +"agama", +"frilled lizard, Chlamydosaurus kingi", +"alligator lizard", +"Gila monster, Heloderma suspectum", +"green lizard, Lacerta viridis", +"African chameleon, Chamaeleo chamaeleon", +"Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis", +"African crocodile, Nile crocodile, Crocodylus niloticus", +"American alligator, Alligator mississipiensis", +"triceratops", +"thunder snake, worm snake, Carphophis amoenus", +"ringneck snake, ring-necked snake, ring snake", +"hognose snake, puff adder, sand viper", +"green snake, grass snake", +"king snake, kingsnake", +"garter snake, grass snake", +"water snake", +"vine snake", +"night snake, Hypsiglena torquata", +"boa constrictor, Constrictor constrictor", +"rock python, rock snake, Python sebae", +"Indian cobra, Naja naja", +"green mamba", +"sea snake", +"horned viper, cerastes, sand viper, horned asp, Cerastes cornutus", +"diamondback, diamondback rattlesnake, Crotalus adamanteus", +"sidewinder, horned rattlesnake, Crotalus cerastes", +"trilobite", +"harvestman, daddy longlegs, Phalangium opilio", +"scorpion", +"black and gold garden spider, Argiope aurantia", +"barn spider, Araneus cavaticus", +"garden spider, Aranea diademata", +"black widow, Latrodectus mactans", +"tarantula", +"wolf spider, hunting spider", +"tick", +"centipede", +"black grouse", +"ptarmigan", +"ruffed grouse, partridge, Bonasa umbellus", +"prairie chicken, prairie grouse, prairie fowl", +"peacock", +"quail", +"partridge", +"African grey, African gray, Psittacus erithacus", +"macaw", +"sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita", +"lorikeet", +"coucal", +"bee eater", +"hornbill", +"hummingbird", +"jacamar", +"toucan", +"drake", +"red-breasted merganser, Mergus serrator", +"goose", +"black swan, Cygnus atratus", +"tusker", +"echidna, spiny anteater, anteater", +"platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus", +"wallaby, brush kangaroo", +"koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus", +"wombat", +"jellyfish", +"sea anemone, anemone", +"brain coral", +"flatworm, platyhelminth", +"nematode, nematode worm, roundworm", +"conch", +"snail", +"slug", +"sea slug, nudibranch", +"chiton, coat-of-mail shell, sea cradle, polyplacophore", +"chambered nautilus, pearly nautilus, nautilus", +"Dungeness crab, Cancer magister", +"rock crab, Cancer irroratus", +"fiddler crab", +"king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica", +"American lobster, Northern lobster, Maine lobster, Homarus americanus", +"spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish", +"crayfish, crawfish, crawdad, crawdaddy", +"hermit crab", +"isopod", +"white stork, Ciconia ciconia", +"black stork, Ciconia nigra", +"spoonbill", +"flamingo", +"little blue heron, Egretta caerulea", +"American egret, great white heron, Egretta albus", +"bittern", +"crane", +"limpkin, Aramus pictus", +"European gallinule, Porphyrio porphyrio", +"American coot, marsh hen, mud hen, water hen, Fulica americana", +"bustard", +"ruddy turnstone, Arenaria interpres", +"red-backed sandpiper, dunlin, Erolia alpina", +"redshank, Tringa totanus", +"dowitcher", +"oystercatcher, oyster catcher", +"pelican", +"king penguin, Aptenodytes patagonica", +"albatross, mollymawk", +"grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus", +"killer whale, killer, orca, grampus, sea wolf, Orcinus orca", +"dugong, Dugong dugon", +"sea lion", +"Chihuahua", +"Japanese spaniel", +"Maltese dog, Maltese terrier, Maltese", +"Pekinese, Pekingese, Peke", +"Shih-Tzu", +"Blenheim spaniel", +"papillon", +"toy terrier", +"Rhodesian ridgeback", +"Afghan hound, Afghan", +"basset, basset hound", +"beagle", +"bloodhound, sleuthhound", +"bluetick", +"black-and-tan coonhound", +"Walker hound, Walker foxhound", +"English foxhound", +"redbone", +"borzoi, Russian wolfhound", +"Irish wolfhound", +"Italian greyhound", +"whippet", +"Ibizan hound, Ibizan Podenco", +"Norwegian elkhound, elkhound", +"otterhound, otter hound", +"Saluki, gazelle hound", +"Scottish deerhound, deerhound", +"Weimaraner", +"Staffordshire bullterrier, Staffordshire bull terrier", +"American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier", +"Bedlington terrier", +"Border terrier", +"Kerry blue terrier", +"Irish terrier", +"Norfolk terrier", +"Norwich terrier", +"Yorkshire terrier", +"wire-haired fox terrier", +"Lakeland terrier", +"Sealyham terrier, Sealyham", +"Airedale, Airedale terrier", +"cairn, cairn terrier", +"Australian terrier", +"Dandie Dinmont, Dandie Dinmont terrier", +"Boston bull, Boston terrier", +"miniature schnauzer", +"giant schnauzer", +"standard schnauzer", +"Scotch terrier, Scottish terrier, Scottie", +"Tibetan terrier, chrysanthemum dog", +"silky terrier, Sydney silky", +"soft-coated wheaten terrier", +"West Highland white terrier", +"Lhasa, Lhasa apso", +"flat-coated retriever", +"curly-coated retriever", +"golden retriever", +"Labrador retriever", +"Chesapeake Bay retriever", +"German short-haired pointer", +"vizsla, Hungarian pointer", +"English setter", +"Irish setter, red setter", +"Gordon setter", +"Brittany spaniel", +"clumber, clumber spaniel", +"English springer, English springer spaniel", +"Welsh springer spaniel", +"cocker spaniel, English cocker spaniel, cocker", +"Sussex spaniel", +"Irish water spaniel", +"kuvasz", +"schipperke", +"groenendael", +"malinois", +"briard", +"kelpie", +"komondor", +"Old English sheepdog, bobtail", +"Shetland sheepdog, Shetland sheep dog, Shetland", +"collie", +"Border collie", +"Bouvier des Flandres, Bouviers des Flandres", +"Rottweiler", +"German shepherd, German shepherd dog, German police dog, alsatian", +"Doberman, Doberman pinscher", +"miniature pinscher", +"Greater Swiss Mountain dog", +"Bernese mountain dog", +"Appenzeller", +"EntleBucher", +"boxer", +"bull mastiff", +"Tibetan mastiff", +"French bulldog", +"Great Dane", +"Saint Bernard, St Bernard", +"Eskimo dog, husky", +"malamute, malemute, Alaskan malamute", +"Siberian husky", +"dalmatian, coach dog, carriage dog", +"affenpinscher, monkey pinscher, monkey dog", +"basenji", +"pug, pug-dog", +"Leonberg", +"Newfoundland, Newfoundland dog", +"Great Pyrenees", +"Samoyed, Samoyede", +"Pomeranian", +"chow, chow chow", +"keeshond", +"Brabancon griffon", +"Pembroke, Pembroke Welsh corgi", +"Cardigan, Cardigan Welsh corgi", +"toy poodle", +"miniature poodle", +"standard poodle", +"Mexican hairless", +"timber wolf, grey wolf, gray wolf, Canis lupus", +"white wolf, Arctic wolf, Canis lupus tundrarum", +"red wolf, maned wolf, Canis rufus, Canis niger", +"coyote, prairie wolf, brush wolf, Canis latrans", +"dingo, warrigal, warragal, Canis dingo", +"dhole, Cuon alpinus", +"African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus", +"hyena, hyaena", +"red fox, Vulpes vulpes", +"kit fox, Vulpes macrotis", +"Arctic fox, white fox, Alopex lagopus", +"grey fox, gray fox, Urocyon cinereoargenteus", +"tabby, tabby cat", +"tiger cat", +"Persian cat", +"Siamese cat, Siamese", +"Egyptian cat", +"cougar, puma, catamount, mountain lion, painter, panther, Felis concolor", +"lynx, catamount", +"leopard, Panthera pardus", +"snow leopard, ounce, Panthera uncia", +"jaguar, panther, Panthera onca, Felis onca", +"lion, king of beasts, Panthera leo", +"tiger, Panthera tigris", +"cheetah, chetah, Acinonyx jubatus", +"brown bear, bruin, Ursus arctos", +"American black bear, black bear, Ursus americanus, Euarctos americanus", +"ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus", +"sloth bear, Melursus ursinus, Ursus ursinus", +"mongoose", +"meerkat, mierkat", +"tiger beetle", +"ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle", +"ground beetle, carabid beetle", +"long-horned beetle, longicorn, longicorn beetle", +"leaf beetle, chrysomelid", +"dung beetle", +"rhinoceros beetle", +"weevil", +"fly", +"bee", +"ant, emmet, pismire", +"grasshopper, hopper", +"cricket", +"walking stick, walkingstick, stick insect", +"cockroach, roach", +"mantis, mantid", +"cicada, cicala", +"leafhopper", +"lacewing, lacewing fly", +"dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk", +"damselfly", +"admiral", +"ringlet, ringlet butterfly", +"monarch, monarch butterfly, milkweed butterfly, Danaus plexippus", +"cabbage butterfly", +"sulphur butterfly, sulfur butterfly", +"lycaenid, lycaenid butterfly", +"starfish, sea star", +"sea urchin", +"sea cucumber, holothurian", +"wood rabbit, cottontail, cottontail rabbit", +"hare", +"Angora, Angora rabbit", +"hamster", +"porcupine, hedgehog", +"fox squirrel, eastern fox squirrel, Sciurus niger", +"marmot", +"beaver", +"guinea pig, Cavia cobaya", +"sorrel", +"zebra", +"hog, pig, grunter, squealer, Sus scrofa", +"wild boar, boar, Sus scrofa", +"warthog", +"hippopotamus, hippo, river horse, Hippopotamus amphibius", +"ox", +"water buffalo, water ox, Asiatic buffalo, Bubalus bubalis", +"bison", +"ram, tup", +"bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis", +"ibex, Capra ibex", +"hartebeest", +"impala, Aepyceros melampus", +"gazelle", +"Arabian camel, dromedary, Camelus dromedarius", +"llama", +"weasel", +"mink", +"polecat, fitch, foulmart, foumart, Mustela putorius", +"black-footed ferret, ferret, Mustela nigripes", +"otter", +"skunk, polecat, wood pussy", +"badger", +"armadillo", +"three-toed sloth, ai, Bradypus tridactylus", +"orangutan, orang, orangutang, Pongo pygmaeus", +"gorilla, Gorilla gorilla", +"chimpanzee, chimp, Pan troglodytes", +"gibbon, Hylobates lar", +"siamang, Hylobates syndactylus, Symphalangus syndactylus", +"guenon, guenon monkey", +"patas, hussar monkey, Erythrocebus patas", +"baboon", +"macaque", +"langur", +"colobus, colobus monkey", +"proboscis monkey, Nasalis larvatus", +"marmoset", +"capuchin, ringtail, Cebus capucinus", +"howler monkey, howler", +"titi, titi monkey", +"spider monkey, Ateles geoffroyi", +"squirrel monkey, Saimiri sciureus", +"Madagascar cat, ring-tailed lemur, Lemur catta", +"indri, indris, Indri indri, Indri brevicaudatus", +"Indian elephant, Elephas maximus", +"African elephant, Loxodonta africana", +"lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens", +"giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca", +"barracouta, snoek", +"eel", +"coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch", +"rock beauty, Holocanthus tricolor", +"anemone fish", +"sturgeon", +"gar, garfish, garpike, billfish, Lepisosteus osseus", +"lionfish", +"puffer, pufferfish, blowfish, globefish", +"abacus", +"abaya", +"academic gown, academic robe, judge's robe", +"accordion, piano accordion, squeeze box", +"acoustic guitar", +"aircraft carrier, carrier, flattop, attack aircraft carrier", +"airliner", +"airship, dirigible", +"altar", +"ambulance", +"amphibian, amphibious vehicle", +"analog clock", +"apiary, bee house", +"apron", +"ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin", +"assault rifle, assault gun", +"backpack, back pack, knapsack, packsack, rucksack, haversack", +"bakery, bakeshop, bakehouse", +"balance beam, beam", +"balloon", +"ballpoint, ballpoint pen, ballpen, Biro", +"Band Aid", +"banjo", +"bannister, banister, balustrade, balusters, handrail", +"barbell", +"barber chair", +"barbershop", +"barn", +"barometer", +"barrel, cask", +"barrow, garden cart, lawn cart, wheelbarrow", +"baseball", +"basketball", +"bassinet", +"bassoon", +"bathing cap, swimming cap", +"bath towel", +"bathtub, bathing tub, bath, tub", +"beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon", +"beacon, lighthouse, beacon light, pharos", +"beaker", +"bearskin, busby, shako", +"beer bottle", +"beer glass", +"bell cote, bell cot", +"bib", +"bicycle-built-for-two, tandem bicycle, tandem", +"bikini, two-piece", +"binder, ring-binder", +"binoculars, field glasses, opera glasses", +"birdhouse", +"boathouse", +"bobsled, bobsleigh, bob", +"bolo tie, bolo, bola tie, bola", +"bonnet, poke bonnet", +"bookcase", +"bookshop, bookstore, bookstall", +"bottlecap", +"bow", +"bow tie, bow-tie, bowtie", +"brass, memorial tablet, plaque", +"brassiere, bra, bandeau", +"breakwater, groin, groyne, mole, bulwark, seawall, jetty", +"breastplate, aegis, egis", +"broom", +"bucket, pail", +"buckle", +"bulletproof vest", +"bullet train, bullet", +"butcher shop, meat market", +"cab, hack, taxi, taxicab", +"caldron, cauldron", +"candle, taper, wax light", +"cannon", +"canoe", +"can opener, tin opener", +"cardigan", +"car mirror", +"carousel, carrousel, merry-go-round, roundabout, whirligig", +"carpenter's kit, tool kit", +"carton", +"car wheel", +"cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM", +"cassette", +"cassette player", +"castle", +"catamaran", +"CD player", +"cello, violoncello", +"cellular telephone, cellular phone, cellphone, cell, mobile phone", +"chain", +"chainlink fence", +"chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour", +"chain saw, chainsaw", +"chest", +"chiffonier, commode", +"chime, bell, gong", +"china cabinet, china closet", +"Christmas stocking", +"church, church building", +"cinema, movie theater, movie theatre, movie house, picture palace", +"cleaver, meat cleaver, chopper", +"cliff dwelling", +"cloak", +"clog, geta, patten, sabot", +"cocktail shaker", +"coffee mug", +"coffeepot", +"coil, spiral, volute, whorl, helix", +"combination lock", +"computer keyboard, keypad", +"confectionery, confectionary, candy store", +"container ship, containership, container vessel", +"convertible", +"corkscrew, bottle screw", +"cornet, horn, trumpet, trump", +"cowboy boot", +"cowboy hat, ten-gallon hat", +"cradle", +"crane", +"crash helmet", +"crate", +"crib, cot", +"Crock Pot", +"croquet ball", +"crutch", +"cuirass", +"dam, dike, dyke", +"desk", +"desktop computer", +"dial telephone, dial phone", +"diaper, nappy, napkin", +"digital clock", +"digital watch", +"dining table, board", +"dishrag, dishcloth", +"dishwasher, dish washer, dishwashing machine", +"disk brake, disc brake", +"dock, dockage, docking facility", +"dogsled, dog sled, dog sleigh", +"dome", +"doormat, welcome mat", +"drilling platform, offshore rig", +"drum, membranophone, tympan", +"drumstick", +"dumbbell", +"Dutch oven", +"electric fan, blower", +"electric guitar", +"electric locomotive", +"entertainment center", +"envelope", +"espresso maker", +"face powder", +"feather boa, boa", +"file, file cabinet, filing cabinet", +"fireboat", +"fire engine, fire truck", +"fire screen, fireguard", +"flagpole, flagstaff", +"flute, transverse flute", +"folding chair", +"football helmet", +"forklift", +"fountain", +"fountain pen", +"four-poster", +"freight car", +"French horn, horn", +"frying pan, frypan, skillet", +"fur coat", +"garbage truck, dustcart", +"gasmask, respirator, gas helmet", +"gas pump, gasoline pump, petrol pump, island dispenser", +"goblet", +"go-kart", +"golf ball", +"golfcart, golf cart", +"gondola", +"gong, tam-tam", +"gown", +"grand piano, grand", +"greenhouse, nursery, glasshouse", +"grille, radiator grille", +"grocery store, grocery, food market, market", +"guillotine", +"hair slide", +"hair spray", +"half track", +"hammer", +"hamper", +"hand blower, blow dryer, blow drier, hair dryer, hair drier", +"hand-held computer, hand-held microcomputer", +"handkerchief, hankie, hanky, hankey", +"hard disc, hard disk, fixed disk", +"harmonica, mouth organ, harp, mouth harp", +"harp", +"harvester, reaper", +"hatchet", +"holster", +"home theater, home theatre", +"honeycomb", +"hook, claw", +"hoopskirt, crinoline", +"horizontal bar, high bar", +"horse cart, horse-cart", +"hourglass", +"iPod", +"iron, smoothing iron", +"jack-o'-lantern", +"jean, blue jean, denim", +"jeep, landrover", +"jersey, T-shirt, tee shirt", +"jigsaw puzzle", +"jinrikisha, ricksha, rickshaw", +"joystick", +"kimono", +"knee pad", +"knot", +"lab coat, laboratory coat", +"ladle", +"lampshade, lamp shade", +"laptop, laptop computer", +"lawn mower, mower", +"lens cap, lens cover", +"letter opener, paper knife, paperknife", +"library", +"lifeboat", +"lighter, light, igniter, ignitor", +"limousine, limo", +"liner, ocean liner", +"lipstick, lip rouge", +"Loafer", +"lotion", +"loudspeaker, speaker, speaker unit, loudspeaker system, speaker system", +"loupe, jeweler's loupe", +"lumbermill, sawmill", +"magnetic compass", +"mailbag, postbag", +"mailbox, letter box", +"maillot", +"maillot, tank suit", +"manhole cover", +"maraca", +"marimba, xylophone", +"mask", +"matchstick", +"maypole", +"maze, labyrinth", +"measuring cup", +"medicine chest, medicine cabinet", +"megalith, megalithic structure", +"microphone, mike", +"microwave, microwave oven", +"military uniform", +"milk can", +"minibus", +"miniskirt, mini", +"minivan", +"missile", +"mitten", +"mixing bowl", +"mobile home, manufactured home", +"Model T", +"modem", +"monastery", +"monitor", +"moped", +"mortar", +"mortarboard", +"mosque", +"mosquito net", +"motor scooter, scooter", +"mountain bike, all-terrain bike, off-roader", +"mountain tent", +"mouse, computer mouse", +"mousetrap", +"moving van", +"muzzle", +"nail", +"neck brace", +"necklace", +"nipple", +"notebook, notebook computer", +"obelisk", +"oboe, hautboy, hautbois", +"ocarina, sweet potato", +"odometer, hodometer, mileometer, milometer", +"oil filter", +"organ, pipe organ", +"oscilloscope, scope, cathode-ray oscilloscope, CRO", +"overskirt", +"oxcart", +"oxygen mask", +"packet", +"paddle, boat paddle", +"paddlewheel, paddle wheel", +"padlock", +"paintbrush", +"pajama, pyjama, pj's, jammies", +"palace", +"panpipe, pandean pipe, syrinx", +"paper towel", +"parachute, chute", +"parallel bars, bars", +"park bench", +"parking meter", +"passenger car, coach, carriage", +"patio, terrace", +"pay-phone, pay-station", +"pedestal, plinth, footstall", +"pencil box, pencil case", +"pencil sharpener", +"perfume, essence", +"Petri dish", +"photocopier", +"pick, plectrum, plectron", +"pickelhaube", +"picket fence, paling", +"pickup, pickup truck", +"pier", +"piggy bank, penny bank", +"pill bottle", +"pillow", +"ping-pong ball", +"pinwheel", +"pirate, pirate ship", +"pitcher, ewer", +"plane, carpenter's plane, woodworking plane", +"planetarium", +"plastic bag", +"plate rack", +"plow, plough", +"plunger, plumber's helper", +"Polaroid camera, Polaroid Land camera", +"pole", +"police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria", +"poncho", +"pool table, billiard table, snooker table", +"pop bottle, soda bottle", +"pot, flowerpot", +"potter's wheel", +"power drill", +"prayer rug, prayer mat", +"printer", +"prison, prison house", +"projectile, missile", +"projector", +"puck, hockey puck", +"punching bag, punch bag, punching ball, punchball", +"purse", +"quill, quill pen", +"quilt, comforter, comfort, puff", +"racer, race car, racing car", +"racket, racquet", +"radiator", +"radio, wireless", +"radio telescope, radio reflector", +"rain barrel", +"recreational vehicle, RV, R.V.", +"reel", +"reflex camera", +"refrigerator, icebox", +"remote control, remote", +"restaurant, eating house, eating place, eatery", +"revolver, six-gun, six-shooter", +"rifle", +"rocking chair, rocker", +"rotisserie", +"rubber eraser, rubber, pencil eraser", +"rugby ball", +"rule, ruler", +"running shoe", +"safe", +"safety pin", +"saltshaker, salt shaker", +"sandal", +"sarong", +"sax, saxophone", +"scabbard", +"scale, weighing machine", +"school bus", +"schooner", +"scoreboard", +"screen, CRT screen", +"screw", +"screwdriver", +"seat belt, seatbelt", +"sewing machine", +"shield, buckler", +"shoe shop, shoe-shop, shoe store", +"shoji", +"shopping basket", +"shopping cart", +"shovel", +"shower cap", +"shower curtain", +"ski", +"ski mask", +"sleeping bag", +"slide rule, slipstick", +"sliding door", +"slot, one-armed bandit", +"snorkel", +"snowmobile", +"snowplow, snowplough", +"soap dispenser", +"soccer ball", +"sock", +"solar dish, solar collector, solar furnace", +"sombrero", +"soup bowl", +"space bar", +"space heater", +"space shuttle", +"spatula", +"speedboat", +"spider web, spider's web", +"spindle", +"sports car, sport car", +"spotlight, spot", +"stage", +"steam locomotive", +"steel arch bridge", +"steel drum", +"stethoscope", +"stole", +"stone wall", +"stopwatch, stop watch", +"stove", +"strainer", +"streetcar, tram, tramcar, trolley, trolley car", +"stretcher", +"studio couch, day bed", +"stupa, tope", +"submarine, pigboat, sub, U-boat", +"suit, suit of clothes", +"sundial", +"sunglass", +"sunglasses, dark glasses, shades", +"sunscreen, sunblock, sun blocker", +"suspension bridge", +"swab, swob, mop", +"sweatshirt", +"swimming trunks, bathing trunks", +"swing", +"switch, electric switch, electrical switch", +"syringe", +"table lamp", +"tank, army tank, armored combat vehicle, armoured combat vehicle", +"tape player", +"teapot", +"teddy, teddy bear", +"television, television system", +"tennis ball", +"thatch, thatched roof", +"theater curtain, theatre curtain", +"thimble", +"thresher, thrasher, threshing machine", +"throne", +"tile roof", +"toaster", +"tobacco shop, tobacconist shop, tobacconist", +"toilet seat", +"torch", +"totem pole", +"tow truck, tow car, wrecker", +"toyshop", +"tractor", +"trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi", +"tray", +"trench coat", +"tricycle, trike, velocipede", +"trimaran", +"tripod", +"triumphal arch", +"trolleybus, trolley coach, trackless trolley", +"trombone", +"tub, vat", +"turnstile", +"typewriter keyboard", +"umbrella", +"unicycle, monocycle", +"upright, upright piano", +"vacuum, vacuum cleaner", +"vase", +"vault", +"velvet", +"vending machine", +"vestment", +"viaduct", +"violin, fiddle", +"volleyball", +"waffle iron", +"wall clock", +"wallet, billfold, notecase, pocketbook", +"wardrobe, closet, press", +"warplane, military plane", +"washbasin, handbasin, washbowl, lavabo, wash-hand basin", +"washer, automatic washer, washing machine", +"water bottle", +"water jug", +"water tower", +"whiskey jug", +"whistle", +"wig", +"window screen", +"window shade", +"Windsor tie", +"wine bottle", +"wing", +"wok", +"wooden spoon", +"wool, woolen, woollen", +"worm fence, snake fence, snake-rail fence, Virginia fence", +"wreck", +"yawl", +"yurt", +"web site, website, internet site, site", +"comic book", +"crossword puzzle, crossword", +"street sign", +"traffic light, traffic signal, stoplight", +"book jacket, dust cover, dust jacket, dust wrapper", +"menu", +"plate", +"guacamole", +"consomme", +"hot pot, hotpot", +"trifle", +"ice cream, icecream", +"ice lolly, lolly, lollipop, popsicle", +"French loaf", +"bagel, beigel", +"pretzel", +"cheeseburger", +"hotdog, hot dog, red hot", +"mashed potato", +"head cabbage", +"broccoli", +"cauliflower", +"zucchini, courgette", +"spaghetti squash", +"acorn squash", +"butternut squash", +"cucumber, cuke", +"artichoke, globe artichoke", +"bell pepper", +"cardoon", +"mushroom", +"Granny Smith", +"strawberry", +"orange", +"lemon", +"fig", +"pineapple, ananas", +"banana", +"jackfruit, jak, jack", +"custard apple", +"pomegranate", +"hay", +"carbonara", +"chocolate sauce, chocolate syrup", +"dough", +"meat loaf, meatloaf", +"pizza, pizza pie", +"potpie", +"burrito", +"red wine", +"espresso", +"cup", +"eggnog", +"alp", +"bubble", +"cliff, drop, drop-off", +"coral reef", +"geyser", +"lakeside, lakeshore", +"promontory, headland, head, foreland", +"sandbar, sand bar", +"seashore, coast, seacoast, sea-coast", +"valley, vale", +"volcano", +"ballplayer, baseball player", +"groom, bridegroom", +"scuba diver", +"rapeseed", +"daisy", +"yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum", +"corn", +"acorn", +"hip, rose hip, rosehip", +"buckeye, horse chestnut, conker", +"coral fungus", +"agaric", +"gyromitra", +"stinkhorn, carrion fungus", +"earthstar", +"hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa", +"bolete", +"ear, spike, capitulum", +"toilet tissue, toilet paper, bathroom tissue" +] \ No newline at end of file diff --git a/notebooks/resnet18/resnet18.ipynb b/notebooks/resnet18/resnet18.ipynb new file mode 100644 index 00000000..39ab403e --- /dev/null +++ b/notebooks/resnet18/resnet18.ipynb @@ -0,0 +1,250 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, we create the pre-trained ImageNet model. We'll use ``resnet18`` from the torchvision package. Make sure to set the device to ``cuda``, since the inputs and parameter devices are inferred from model. Also make sure to set ``eval()`` to fix batch norm statistics." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torchvision\n", + "\n", + "model = torchvision.models.resnet18(pretrained=True).cuda().half().eval()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we create some sample input that will be used to infer the shape and data types of our TensorRT engine" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "\n", + "data = torch.randn((1, 3, 224, 224)).cuda().half()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, create the optimized TensorRT engine." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from torch2trt import torch2trt\n", + "\n", + "model_trt = torch2trt(model, [data], fp16_mode=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can execute the network like this" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "output_trt = model_trt(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And check against the original output" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([ 0.7231, 3.0195, 3.1016, 3.1152, 4.7539, 3.8301, 3.9180, 0.3086,\n", + " -0.8726, -0.2261], device='cuda:0', dtype=torch.float16,\n", + " grad_fn=)\n", + "tensor([ 0.7202, 3.0234, 3.1074, 3.1133, 4.7539, 3.8340, 3.9141, 0.3081,\n", + " -0.8716, -0.2227], device='cuda:0', dtype=torch.float16)\n", + "max error: 0.011719\n" + ] + } + ], + "source": [ + "output = model(data)\n", + "\n", + "print(output.flatten()[0:10])\n", + "print(output_trt.flatten()[0:10])\n", + "print('max error: %f' % float(torch.max(torch.abs(output - output_trt))))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "with open('imagenet_labels.json', 'r') as f:\n", + " labels = json.load(f)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "import cv2\n", + "import numpy as np\n", + "\n", + "device = torch.device('cuda')\n", + "mean = 255.0 * np.array([0.485, 0.456, 0.406])\n", + "stdev = 255.0 * np.array([0.229, 0.224, 0.225])\n", + "\n", + "normalize = torchvision.transforms.Normalize(mean, stdev)\n", + "\n", + "def preprocess(camera_value):\n", + " global device, normalize\n", + " x = camera_value\n", + " x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)\n", + " x = x.transpose((2, 0, 1))\n", + " x = torch.from_numpy(x).float()\n", + " x = normalize(x)\n", + " x = x.to(device)\n", + " x = x[None, ...]\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "from jetbot import Camera\n", + "import ipywidgets\n", + "\n", + "camera = Camera(width=224, height=224)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e53efc4ab70d4b19a622c944cf92d81b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Image(value=b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00\\x00\\x01\\x00\\x01\\x00\\x00\\xff\\xdb\\x00C\\x00\\x02\\x01\\x0…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from jetbot import bgr8_to_jpeg\n", + "import traitlets\n", + "\n", + "image_w = ipywidgets.Image()\n", + "\n", + "traitlets.dlink((camera, 'value'), (image_w, 'value'), transform=bgr8_to_jpeg)\n", + "\n", + "display(image_w)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "152f1b979b2e46199daea08c14f9265b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Textarea(value='')" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "text = ipywidgets.Textarea()\n", + "display(text)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "def execute(change):\n", + " image = change['new']\n", + " output = model_trt(preprocess(image).half()).detach().cpu().numpy().flatten()\n", + " idx = output.argmax()\n", + " text.value = labels[idx]\n", + "\n", + "execute({'new': camera.value})" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/torch2trt.py b/torch2trt.py index 29b3df1e..a21a24a5 100644 --- a/torch2trt.py +++ b/torch2trt.py @@ -2,6 +2,7 @@ import tensorrt as trt from copy import copy import numpy as np +import atexit # UTILITY FUNCTIONS @@ -50,6 +51,29 @@ def torch_device_from_trt(device): else: return TypeError('%s is not supported by torch' % device) + +def trt_input_names(count): + return ['input_%d' % i for i in range(count)] + + +def trt_output_names(count): + return ['output_%d' % i for i in range(count)] + + +def trt_num_inputs(engine): + count = 0 + for i in range(engine.num_bindings): + if engine.binding_is_input(i): + count += 1 + return count + + +def trt_num_outputs(engine): + count = 0 + for i in range(engine.num_bindings): + if not engine.binding_is_input(i): + count += 1 + return count # CONVERSION REGISTRY AND HOOKS @@ -123,7 +147,7 @@ def __exit__(self, type, val, tb): def add_inputs(self, torch_inputs, names=None): if names is None: - names = ['input_%d' % i for i in range(len(torch_inputs))] + names = trt_input_names(len(torch_inputs)) self.input_names = names for i, torch_input in enumerate(torch_inputs): @@ -138,7 +162,7 @@ def add_inputs(self, torch_inputs, names=None): def mark_outputs(self, torch_outputs, names=None): if names is None: - names = ['output_%d' % i for i in range(len(torch_outputs))] + names = trt_output_names(len(torch_outputs)) self.output_names = names for i, torch_output in enumerate(torch_outputs): @@ -150,14 +174,22 @@ def mark_outputs(self, torch_outputs, names=None): class TRTModule(torch.nn.Module): - def __init__(self, engine, input_names, output_names, final_shapes=None): - self.input_names = input_names - self.output_names = output_names + def __init__(self, engine, input_names=None, output_names=None, final_shapes=None): + super(TRTModule, self).__init__() + self._trt_engine = engine self._trt_context = self._trt_engine.create_execution_context() - super(TRTModule, self).__init__() + + self.input_names = input_names + if self.input_names is None: + self.input_names = trt_input_names(trt_num_inputs(self._trt_engine)) + + self.output_names = output_names + if self.output_names is None: + self.output_names = trt_output_names(trt_num_outputs(self._trt_engine)) + self.final_shapes = final_shapes - + def forward(self, *inputs): batch_size = inputs[0].shape[0] bindings = [None] * (len(self.input_names) + len(self.output_names)) From 4c9a1332e11210b4d38bbc954ba57de6947e06af Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 20 May 2019 12:00:48 -0700 Subject: [PATCH 052/355] removed example nb --- example.ipynb | 100 -------------------------------------------------- 1 file changed, 100 deletions(-) delete mode 100644 example.ipynb diff --git a/example.ipynb b/example.ipynb deleted file mode 100644 index 0e2a4f8a..00000000 --- a/example.ipynb +++ /dev/null @@ -1,100 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import torchvision.models\n", - "import torch\n", - "import tensorrt as trt\n", - "from torch2trt import torch2trt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model = torchvision.models.resnet18(pretrained=True).cuda().eval()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "input = torch.ones((1, 3, 224, 224)).cuda()\n", - "\n", - "model_trt = torch2trt(model, [input], fp16_mode=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import time\n", - "input = input.half()\n", - "\n", - "model = model.half()\n", - "output = model(input)\n", - "print(output.flatten()[0:10])\n", - "\n", - "t0 = time.time()\n", - "with torch.no_grad():\n", - " for i in range(50):\n", - " output = model(input)\n", - " #output = model_trt(input)\n", - "t1 = time.time()\n", - "\n", - "print((t1 - t0) / 50.0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import time\n", - "input = input.float()\n", - "output = model_trt(input)\n", - "print(output.flatten()[0:10])\n", - "\n", - "t0 = time.time()\n", - "with torch.no_grad():\n", - " for i in range(50):\n", - " output = model_trt(input)\n", - "t1 = time.time()\n", - "\n", - "print((t1 - t0) / 50.0)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.7" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From e26d0f09f7a8405d65f8c29f697d27882210c9fc Mon Sep 17 00:00:00 2001 From: John Date: Mon, 20 May 2019 12:01:49 -0700 Subject: [PATCH 053/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9cf2782a..4f73ee4a 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ TensorRT Python API. The converter is * Easy to extend - Write your own layer converter in Python and register it with ``@tensorrt_converter`` -If you find an issue, please [let us know](../..//issues)! We'd also love to hear if you create your own ``@tensorrt_converter``. It may be helpful to others. +If you find an issue, please [let us know](../..//issues)! ### Setup From 233a35c4c6c1d1efbe9c82419eebe8d616b93031 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 20 May 2019 15:24:30 -0700 Subject: [PATCH 054/355] added state_dict loading / saving --- torch2trt.py | 59 +++++++++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/torch2trt.py b/torch2trt.py index a21a24a5..9fce64fd 100644 --- a/torch2trt.py +++ b/torch2trt.py @@ -50,14 +50,6 @@ def torch_device_from_trt(device): return torch.device('cpu') else: return TypeError('%s is not supported by torch' % device) - - -def trt_input_names(count): - return ['input_%d' % i for i in range(count)] - - -def trt_output_names(count): - return ['output_%d' % i for i in range(count)] def trt_num_inputs(engine): @@ -147,7 +139,7 @@ def __exit__(self, type, val, tb): def add_inputs(self, torch_inputs, names=None): if names is None: - names = trt_input_names(len(torch_inputs)) + names = ['input_%d' % i for i in range(len(torch_inputs))] self.input_names = names for i, torch_input in enumerate(torch_inputs): @@ -162,7 +154,7 @@ def add_inputs(self, torch_inputs, names=None): def mark_outputs(self, torch_outputs, names=None): if names is None: - names = trt_output_names(len(torch_outputs)) + names = ['output_%d' % i for i in range(len(torch_outputs))] self.output_names = names for i, torch_output in enumerate(torch_outputs): @@ -174,22 +166,33 @@ def mark_outputs(self, torch_outputs, names=None): class TRTModule(torch.nn.Module): - def __init__(self, engine, input_names=None, output_names=None, final_shapes=None): + def __init__(self, engine=None, input_names=None, output_names=None, final_shapes=None): super(TRTModule, self).__init__() - - self._trt_engine = engine - self._trt_context = self._trt_engine.create_execution_context() - + self._register_state_dict_hook(TRTModule._on_state_dict) + self.engine = engine + if self.engine is not None: + self.context = self.engine.create_execution_context() self.input_names = input_names - if self.input_names is None: - self.input_names = trt_input_names(trt_num_inputs(self._trt_engine)) - self.output_names = output_names - if self.output_names is None: - self.output_names = trt_output_names(trt_num_outputs(self._trt_engine)) - self.final_shapes = final_shapes + def _on_state_dict(self, state_dict, prefix, local_metadata): + state_dict[prefix + 'engine'] = bytes(self.engine.serialize()) + state_dict[prefix + 'input_names'] = self.input_names + state_dict[prefix + 'output_names'] = self.output_names + state_dict[prefix + 'final_shapes'] = self.final_shapes + + def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs): + engine_bytes = state_dict[prefix + 'engine'] + + with trt.Logger() as logger, trt.Runtime(logger) as runtime: + self.engine = runtime.deserialize_cuda_engine(engine_bytes) + self.context = self.engine.create_execution_context() + + self.input_names = state_dict[prefix + 'input_names'] + self.output_names = state_dict[prefix + 'output_names'] + self.final_shapes = state_dict[prefix + 'final_shapes'] + def forward(self, *inputs): batch_size = inputs[0].shape[0] bindings = [None] * (len(self.input_names) + len(self.output_names)) @@ -197,22 +200,22 @@ def forward(self, *inputs): # create output tensors outputs = [None] * len(self.output_names) for i, output_name in enumerate(self.output_names): - idx = self._trt_engine.get_binding_index(output_name) - dtype = torch_dtype_from_trt(self._trt_engine.get_binding_dtype(idx)) + idx = self.engine.get_binding_index(output_name) + dtype = torch_dtype_from_trt(self.engine.get_binding_dtype(idx)) if self.final_shapes is not None: shape = (batch_size, ) + self.final_shapes[i] else: - shape = (batch_size, ) + tuple(self._trt_engine.get_binding_shape(idx)) - device = torch_device_from_trt(self._trt_engine.get_location(idx)) + shape = (batch_size, ) + tuple(self.engine.get_binding_shape(idx)) + device = torch_device_from_trt(self.engine.get_location(idx)) output = torch.empty(size=shape, dtype=dtype, device=device) outputs[i] = output bindings[idx] = output.data_ptr() for i, input_name in enumerate(self.input_names): - idx = self._trt_engine.get_binding_index(input_name) + idx = self.engine.get_binding_index(input_name) bindings[idx] = inputs[i].data_ptr() - self._trt_context.execute_async(batch_size, bindings, torch.cuda.current_stream().cuda_stream) + self.context.execute_async(batch_size, bindings, torch.cuda.current_stream().cuda_stream) outputs = tuple(outputs) if len(outputs) == 1: @@ -245,7 +248,7 @@ def torch2trt(module, inputs, input_names=None, output_names=None, max_batch_siz builder.max_batch_size = max_batch_size engine = builder.build_cuda_engine(network) - + return TRTModule(engine, ctx.input_names, ctx.output_names, final_shapes) From 3dac9b8279acfb82a3f02ae1092c1378146c88ce Mon Sep 17 00:00:00 2001 From: John Date: Mon, 20 May 2019 15:58:04 -0700 Subject: [PATCH 055/355] Update README.md --- README.md | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4f73ee4a..503b93dd 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,10 @@ python setup.py install --user ### Usage +Below are some usage examples, for more check out the [notebooks](notebooks). + +#### Convert + ```python from torch2trt import torch2trt from torchvision.models.alexnet import alexnet @@ -31,15 +35,44 @@ x = torch.ones((1, 3, 224, 224)).cuda() model_trt = torch2trt(model, [x]) ``` -We can then test the output of the regular and TensorRT optimized models +#### Execute -``` +We can execute returned ``TRTModule`` just like the original PyTorch model + +```python y = model(x) y_trt = model_trt(x) +# check the output against print(torch.max(torch.abs(y - y_trt))) ``` +We can also execute on fixed output buffers + +```python +y = torch.empty((1, 1000)).cuda() + +model_trt.execute([x], [y]) +``` + +#### Save and load + +We can save the model as a ``state_dict``. + +```python +torch.save(model_trt.state_dict(), 'alexnet_trt.pth') +``` + +We can load the saved model into a ``TRTModule`` + +```python +from torch2trt import TRTModule + +model_trt = TRTModule() + +model_trt.load_state_dict(torch.load('alexnet_trt.pth')) +``` + ### Tested models Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just includes model execution (not data copy). From 44cc54ce54aa391c4ccc40e881528f0501bab4df Mon Sep 17 00:00:00 2001 From: John Date: Mon, 20 May 2019 16:05:56 -0700 Subject: [PATCH 056/355] Update README.md --- README.md | 8 -------- 1 file changed, 8 deletions(-) diff --git a/README.md b/README.md index 503b93dd..199ceb1d 100644 --- a/README.md +++ b/README.md @@ -47,14 +47,6 @@ y_trt = model_trt(x) print(torch.max(torch.abs(y - y_trt))) ``` -We can also execute on fixed output buffers - -```python -y = torch.empty((1, 1000)).cuda() - -model_trt.execute([x], [y]) -``` - #### Save and load We can save the model as a ``state_dict``. From 54f15de1c8a6280ceef89e60b435f12821a62b60 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 20 May 2019 16:38:03 -0700 Subject: [PATCH 057/355] created package --- setup.py | 4 ++-- torch2trt/__init__.py | 1 + torch2trt.py => torch2trt/torch2trt.py | 0 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 torch2trt/__init__.py rename torch2trt.py => torch2trt/torch2trt.py (100%) diff --git a/setup.py b/setup.py index 93d09e83..e65ffb2d 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,8 @@ -from setuptools import setup +from setuptools import setup, find_packages setup( name='torch2trt', version='0.0', description='PyTorch to TensorRT converter', - py_modules=['torch2trt'], + packages=find_packages(), ) diff --git a/torch2trt/__init__.py b/torch2trt/__init__.py new file mode 100644 index 00000000..c8179b7e --- /dev/null +++ b/torch2trt/__init__.py @@ -0,0 +1 @@ +from .torch2trt import * \ No newline at end of file diff --git a/torch2trt.py b/torch2trt/torch2trt.py similarity index 100% rename from torch2trt.py rename to torch2trt/torch2trt.py From 4163d2abb124aa1da86a3524232abcecd5103241 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 29 May 2019 12:28:11 -0700 Subject: [PATCH 058/355] refactor + test --- torch2trt/__init__.py | 3 +- torch2trt/converters/AdaptiveAvgPool2d.py | 21 ++ torch2trt/converters/AvgPool2d.py | 26 ++ torch2trt/converters/BatchNorm2d.py | 16 ++ torch2trt/converters/Conv2d.py | 40 +++ torch2trt/converters/ConvTranspose2d.py | 40 +++ torch2trt/converters/Identity.py | 10 + torch2trt/converters/Linear.py | 16 ++ torch2trt/converters/LogSoftmax.py | 11 + torch2trt/converters/MaxPool2d.py | 27 ++ torch2trt/converters/ReLU.py | 10 + torch2trt/converters/ReLU6.py | 17 ++ torch2trt/converters/__init__.py | 17 ++ torch2trt/converters/adaptive_avg_pool2d.py | 8 + torch2trt/converters/add.py | 11 + torch2trt/converters/cat.py | 18 ++ torch2trt/converters/identity.py | 11 + torch2trt/converters/relu.py | 8 + torch2trt/converters/relu6.py | 8 + torch2trt/test.py | 60 ++++ torch2trt/torch2trt.py | 291 +------------------- 21 files changed, 380 insertions(+), 289 deletions(-) create mode 100644 torch2trt/converters/AdaptiveAvgPool2d.py create mode 100644 torch2trt/converters/AvgPool2d.py create mode 100644 torch2trt/converters/BatchNorm2d.py create mode 100644 torch2trt/converters/Conv2d.py create mode 100644 torch2trt/converters/ConvTranspose2d.py create mode 100644 torch2trt/converters/Identity.py create mode 100644 torch2trt/converters/Linear.py create mode 100644 torch2trt/converters/LogSoftmax.py create mode 100644 torch2trt/converters/MaxPool2d.py create mode 100644 torch2trt/converters/ReLU.py create mode 100644 torch2trt/converters/ReLU6.py create mode 100644 torch2trt/converters/__init__.py create mode 100644 torch2trt/converters/adaptive_avg_pool2d.py create mode 100644 torch2trt/converters/add.py create mode 100644 torch2trt/converters/cat.py create mode 100644 torch2trt/converters/identity.py create mode 100644 torch2trt/converters/relu.py create mode 100644 torch2trt/converters/relu6.py create mode 100644 torch2trt/test.py diff --git a/torch2trt/__init__.py b/torch2trt/__init__.py index c8179b7e..23eac8db 100644 --- a/torch2trt/__init__.py +++ b/torch2trt/__init__.py @@ -1 +1,2 @@ -from .torch2trt import * \ No newline at end of file +from .torch2trt import * +from .converters import * \ No newline at end of file diff --git a/torch2trt/converters/AdaptiveAvgPool2d.py b/torch2trt/converters/AdaptiveAvgPool2d.py new file mode 100644 index 00000000..93a13f5d --- /dev/null +++ b/torch2trt/converters/AdaptiveAvgPool2d.py @@ -0,0 +1,21 @@ +from torch2trt.torch2trt import * + + +@tensorrt_converter('torch.nn.AdaptiveAvgPool2d.forward') +def convert_AdaptiveAvgPool2d(ctx): + module = ctx.method_args[0] + input = ctx.method_args[1] + output = ctx.method_return + + output_size = module.output_size + if not isinstance(output_size, tuple): + output_size = (output_size, ) * 2 + + stride = (input._trt.shape[-2] // output_size[-2], input._trt.shape[-1] // output_size[-1]) + + kernel_size = stride + layer = ctx.network.add_pooling( + input=input._trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size) + layer.stride = stride + + output._trt = layer.get_output(0) \ No newline at end of file diff --git a/torch2trt/converters/AvgPool2d.py b/torch2trt/converters/AvgPool2d.py new file mode 100644 index 00000000..529ccad0 --- /dev/null +++ b/torch2trt/converters/AvgPool2d.py @@ -0,0 +1,26 @@ +from torch2trt.torch2trt import * + + +@tensorrt_converter('torch.nn.AvgPool2d.forward') +def convert_AvgPool2d(ctx): + module = ctx.method_args[0] + input = ctx.method_args[1] + output = ctx.method_return + + kernel_size = module.kernel_size + if not isinstance(kernel_size, tuple): + kernel_size = (kernel_size, ) * 2 + stride = module.stride + if not isinstance(stride, tuple): + stride = (stride, ) * 2 + padding = module.padding + if not isinstance(padding, tuple): + padding = (padding, ) * 2 + + layer = ctx.network.add_pooling( + input=input._trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size) + layer.stride = stride + layer.padding = padding + layer.average_count_excludes_padding = not module.count_include_pad + + output._trt = layer.get_output(0) \ No newline at end of file diff --git a/torch2trt/converters/BatchNorm2d.py b/torch2trt/converters/BatchNorm2d.py new file mode 100644 index 00000000..66ab7c55 --- /dev/null +++ b/torch2trt/converters/BatchNorm2d.py @@ -0,0 +1,16 @@ +from torch2trt.torch2trt import * + + +@tensorrt_converter('torch.nn.BatchNorm2d.forward') +def convert_BatchNorm2d(ctx): + module = ctx.method_args[0] + input = ctx.method_args[1] + output = ctx.method_return + + scale = module.weight.detach().cpu().numpy() / np.sqrt(module.running_var.detach().cpu().numpy() + module.eps) + bias = module.bias.detach().cpu().numpy() - module.running_mean.detach().cpu().numpy() * scale + power = np.ones_like(scale) + + layer = ctx.network.add_scale(input._trt, trt.ScaleMode.CHANNEL, bias, scale, power) + + output._trt = layer.get_output(0) \ No newline at end of file diff --git a/torch2trt/converters/Conv2d.py b/torch2trt/converters/Conv2d.py new file mode 100644 index 00000000..874ab98a --- /dev/null +++ b/torch2trt/converters/Conv2d.py @@ -0,0 +1,40 @@ +from torch2trt.torch2trt import * + + +@tensorrt_converter('torch.nn.Conv2d.forward') +def convert_Conv2d(ctx): + module = ctx.method_args[0] + input = ctx.method_args[1] + output = ctx.method_return + + kernel_size = module.kernel_size + if not isinstance(kernel_size, tuple): + kernel_size = (kernel_size, ) * 2 + + stride = module.stride + if not isinstance(stride, tuple): + stride = (stride, ) * 2 + + padding = module.padding + if not isinstance(padding, tuple): + padding = (padding, ) * 2 + + kernel = module.weight.detach().cpu().numpy() + + bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) + if module.bias is not None: + bias = module.bias.detach().cpu().numpy() + + layer = ctx.network.add_convolution( + input=input._trt, + num_output_maps=module.out_channels, + kernel_shape=kernel_size, + kernel=kernel, + bias=bias) + layer.stride = stride + layer.padding = padding + + if module.groups is not None: + layer.num_groups = module.groups + + output._trt = layer.get_output(0) \ No newline at end of file diff --git a/torch2trt/converters/ConvTranspose2d.py b/torch2trt/converters/ConvTranspose2d.py new file mode 100644 index 00000000..700dc232 --- /dev/null +++ b/torch2trt/converters/ConvTranspose2d.py @@ -0,0 +1,40 @@ +from torch2trt.torch2trt import * + + +@tensorrt_converter('torch.nn.ConvTranspose2d.forward') +def convert_ConvTranspose2d(ctx): + module = ctx.method_args[0] + input = ctx.method_args[1] + output = ctx.method_return + + kernel_size = module.kernel_size + if not isinstance(kernel_size, tuple): + kernel_size = (kernel_size, ) * 2 + + stride = module.stride + if not isinstance(stride, tuple): + stride = (stride, ) * 2 + + padding = module.padding + if not isinstance(padding, tuple): + padding = (padding, ) * 2 + + kernel = module.weight.detach().cpu().numpy() + + bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) + if module.bias is not None: + bias = module.bias.detach().cpu().numpy() + + layer = ctx.network.add_deconvolution( + input=input._trt, + num_output_maps=module.out_channels, + kernel_shape=kernel_size, + kernel=kernel, + bias=bias) + layer.stride = stride + layer.padding = padding + + if module.groups is not None: + layer.num_groups = module.groups + + output._trt = layer.get_output(0) \ No newline at end of file diff --git a/torch2trt/converters/Identity.py b/torch2trt/converters/Identity.py new file mode 100644 index 00000000..0cdab1f1 --- /dev/null +++ b/torch2trt/converters/Identity.py @@ -0,0 +1,10 @@ +from torch2trt.torch2trt import * + + +@tensorrt_converter('torch.nn.Dropout.forward') +@tensorrt_converter('torch.nn.Dropout2d.forward') +@tensorrt_converter('torch.nn.Dropout3d.forward') +def convert_Identity(ctx): + input = ctx.method_args[1] + output = ctx.method_return + output._trt = input._trt \ No newline at end of file diff --git a/torch2trt/converters/Linear.py b/torch2trt/converters/Linear.py new file mode 100644 index 00000000..538498a2 --- /dev/null +++ b/torch2trt/converters/Linear.py @@ -0,0 +1,16 @@ +from torch2trt.torch2trt import * + + +@tensorrt_converter('torch.nn.Linear.forward') +def convert_Linear(ctx): + module = ctx.method_args[0] + input = ctx.method_args[1] + output = ctx.method_return + + layer = ctx.network.add_fully_connected( + input=input._trt, + num_outputs=module.out_features, + kernel=module.weight.detach().cpu().numpy(), + bias=module.bias.detach().cpu().numpy()) + + output._trt = layer.get_output(0) \ No newline at end of file diff --git a/torch2trt/converters/LogSoftmax.py b/torch2trt/converters/LogSoftmax.py new file mode 100644 index 00000000..065ed658 --- /dev/null +++ b/torch2trt/converters/LogSoftmax.py @@ -0,0 +1,11 @@ +from torch2trt.torch2trt import * + + +@tensorrt_converter('torch.nn.LogSoftmax.forward') +def convert_LogSoftmax(ctx): + input = ctx.method_args[1] + output = ctx.method_return + layer = ctx.network.add_softmax(input=input._trt) + layer = ctx.network.add_unary(input=layer.get_output(0), + op=trt.UnaryOperation.LOG) + output._trt = layer.get_output(0) \ No newline at end of file diff --git a/torch2trt/converters/MaxPool2d.py b/torch2trt/converters/MaxPool2d.py new file mode 100644 index 00000000..4b6e42da --- /dev/null +++ b/torch2trt/converters/MaxPool2d.py @@ -0,0 +1,27 @@ +from torch2trt.torch2trt import * + + +@tensorrt_converter('torch.nn.MaxPool2d.forward') +def convert_MaxPool2d(ctx): + module = ctx.method_args[0] + input = ctx.method_args[1] + output = ctx.method_return + + kernel_size = module.kernel_size + if not isinstance(kernel_size, tuple): + kernel_size = (kernel_size, ) * 2 + + stride = module.stride + if not isinstance(stride, tuple): + stride = (stride, ) * 2 + + padding = module.padding + if not isinstance(padding, tuple): + padding = (padding, ) * 2 + + layer = ctx.network.add_pooling( + input=input._trt, type=trt.PoolingType.MAX, window_size=kernel_size) + layer.stride = stride + layer.padding = padding + + output._trt = layer.get_output(0) \ No newline at end of file diff --git a/torch2trt/converters/ReLU.py b/torch2trt/converters/ReLU.py new file mode 100644 index 00000000..ffa9d4ca --- /dev/null +++ b/torch2trt/converters/ReLU.py @@ -0,0 +1,10 @@ +from torch2trt.torch2trt import * + + +@tensorrt_converter('torch.nn.ReLU.forward') +def convert_ReLU(ctx): + input = ctx.method_args[1] + output = ctx.method_return + layer = ctx.network.add_activation( + input=input._trt, type=trt.ActivationType.RELU) + output._trt = layer.get_output(0) \ No newline at end of file diff --git a/torch2trt/converters/ReLU6.py b/torch2trt/converters/ReLU6.py new file mode 100644 index 00000000..b2aacf20 --- /dev/null +++ b/torch2trt/converters/ReLU6.py @@ -0,0 +1,17 @@ +from torch2trt.torch2trt import * + + +@tensorrt_converter('torch.nn.ReLU6.forward') +def convert_ReLU6(ctx): + input = ctx.method_args[1] + output = ctx.method_return + + layer = ctx.network.add_activation( + input=input._trt, type=trt.ActivationType.RELU) + shape = (1, ) * len(input._trt.shape) # broadcast all dimensions + tensor = 6.0 * torch.ones(shape, dtype=torch_dtype_from_trt(input._trt.dtype)).cpu().numpy() + trt_6 = ctx.network.add_constant(shape, tensor) + layer = ctx.network.add_elementwise( + layer.get_output(0), trt_6.get_output(0), trt.ElementWiseOperation.MIN) + + output._trt = layer.get_output(0) \ No newline at end of file diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py new file mode 100644 index 00000000..65951dfe --- /dev/null +++ b/torch2trt/converters/__init__.py @@ -0,0 +1,17 @@ +from .adaptive_avg_pool2d import * +from .AdaptiveAvgPool2d import * +from .add import * +from .AvgPool2d import * +from .BatchNorm2d import * +from .cat import * +from .Conv2d import * +from .ConvTranspose2d import * +from .identity import * +from .Identity import * +from .Linear import * +from .LogSoftmax import * +from .MaxPool2d import * +from .relu import * +from .ReLU import * +from .relu6 import * +from .ReLU6 import * \ No newline at end of file diff --git a/torch2trt/converters/adaptive_avg_pool2d.py b/torch2trt/converters/adaptive_avg_pool2d.py new file mode 100644 index 00000000..30710eb8 --- /dev/null +++ b/torch2trt/converters/adaptive_avg_pool2d.py @@ -0,0 +1,8 @@ +from torch2trt.torch2trt import * +from .AdaptiveAvgPool2d import * + + +@tensorrt_converter('torch.nn.functional.adaptive_avg_pool2d') +def convert_adaptive_avg_pool2d(ctx): + ctx.method_args = (torch.nn.AdaptiveAvgPool2d(ctx.method_args[1]), ctx.method_args[0]) + convert_AdaptiveAvgPool2d(ctx) diff --git a/torch2trt/converters/add.py b/torch2trt/converters/add.py new file mode 100644 index 00000000..801e26c8 --- /dev/null +++ b/torch2trt/converters/add.py @@ -0,0 +1,11 @@ +from torch2trt.torch2trt import * + + +@tensorrt_converter('torch.Tensor.__iadd__') +@tensorrt_converter('torch.Tensor.__add__') +def convert_add(ctx): + input_a = ctx.method_args[0] + input_b = ctx.method_args[1] + output = ctx.method_return + layer = ctx.network.add_elementwise(input_a._trt, input_b._trt, trt.ElementWiseOperation.SUM) + output._trt = layer.get_output(0) \ No newline at end of file diff --git a/torch2trt/converters/cat.py b/torch2trt/converters/cat.py new file mode 100644 index 00000000..706dc032 --- /dev/null +++ b/torch2trt/converters/cat.py @@ -0,0 +1,18 @@ +from torch2trt.torch2trt import * + + +@tensorrt_converter('torch.cat') +def convert_cat(ctx): + inputs = ctx.method_args[0] + + if 'dim' in ctx.method_kwargs: + dim = ctx.method_kwargs['dim'] + else: + dim = ctx.method_args[1] + + output = ctx.method_return + trt_inputs = [i._trt for i in inputs] + + layer = ctx.network.add_concatenation(inputs=trt_inputs) + layer.axis = dim - 1 + output._trt = layer.get_output(0) \ No newline at end of file diff --git a/torch2trt/converters/identity.py b/torch2trt/converters/identity.py new file mode 100644 index 00000000..5a2de660 --- /dev/null +++ b/torch2trt/converters/identity.py @@ -0,0 +1,11 @@ +from torch2trt.torch2trt import * + + +@tensorrt_converter('torch.Tensor.view') +@tensorrt_converter('torch.nn.functional.dropout') +@tensorrt_converter('torch.nn.functional.dropout2d') +@tensorrt_converter('torch.nn.functional.dropout3d') +def convert_identity(ctx): + input = ctx.method_args[0] + output = ctx.method_return + output._trt = input._trt \ No newline at end of file diff --git a/torch2trt/converters/relu.py b/torch2trt/converters/relu.py new file mode 100644 index 00000000..ba3b5e0a --- /dev/null +++ b/torch2trt/converters/relu.py @@ -0,0 +1,8 @@ +from torch2trt.torch2trt import * +from .ReLU import * + + +@tensorrt_converter('torch.nn.functional.relu') +def convert_relu(ctx): + ctx.method_args = (torch.nn.ReLU(),) + ctx.method_args + convert_ReLU(ctx) \ No newline at end of file diff --git a/torch2trt/converters/relu6.py b/torch2trt/converters/relu6.py new file mode 100644 index 00000000..fc4e6ec0 --- /dev/null +++ b/torch2trt/converters/relu6.py @@ -0,0 +1,8 @@ +from torch2trt.torch2trt import * +from .ReLU6 import * + + +@tensorrt_converter('torch.nn.functional.relu6') +def convert_relu6(ctx): + ctx.method_args = (torch.nn.ReLU6(),) + ctx.method_args + convert_ReLU6(ctx) \ No newline at end of file diff --git a/torch2trt/test.py b/torch2trt/test.py new file mode 100644 index 00000000..90799b51 --- /dev/null +++ b/torch2trt/test.py @@ -0,0 +1,60 @@ +from torch2trt import * +import torchvision + + +class ModuleTest(object): + def __init__(self, module_fn, type, device, input_shapes, max_error=1e-2, **torch2trt_kwargs): + self.module_fn = module_fn + self.type = type + self.device = device + self.input_shapes = input_shapes + self.max_error = max_error + self.torch2trt_kwargs = torch2trt_kwargs + + def run(self): + # create module + module = self.module_fn() + module = module.to(self.device) + module = module.type(self.type) + module = module.eval() + + # create inputs + inputs = () + for shape in self.input_shapes: + inputs += (torch.ones(shape).to(self.device).type(self.type), ) + + # convert module + module_trt = torch2trt(module, inputs, **self.torch2trt_kwargs) + + # test output against original + outputs = module(*inputs) + outputs_trt = module_trt(*inputs) + + if not isinstance(outputs, tuple): + outputs = (outputs, ) + + for i in range(len(outputs)): + max_error = torch.max(torch.abs(outputs[i] - outputs_trt[i])) + if max_error > self.max_error: + raise RuntimeError('Output %d max error exceeded threshold of %f' % (i, self.max_error)) + + + +TESTS = { + 'resnet18_fp16': ModuleTest( + torchvision.models.resnet18, + torch.float16, + torch.device('cuda'), + [(1, 3, 224, 224)], + max_error=1e-2, + fp16_mode=True + ), +} + + +if __name__ == '__main__': + for name, test in TESTS.items(): + print('Testing %s ...' % name, end=" ") + test.run() + print('PASSED') + \ No newline at end of file diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 9fce64fd..0e9ca223 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -2,7 +2,6 @@ import tensorrt as trt from copy import copy import numpy as np -import atexit # UTILITY FUNCTIONS @@ -224,9 +223,9 @@ def forward(self, *inputs): return outputs -def torch2trt(module, inputs, input_names=None, output_names=None, max_batch_size=1, +def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt.Logger.ERROR, max_batch_size=1, fp16_mode=False, max_workspace_size=0): - with trt.Logger(trt.Logger.INFO) as logger, trt.Builder(logger) as builder,\ + with trt.Logger(log_level) as logger, trt.Builder(logger) as builder,\ builder.create_network() as network, ConversionContext(network) as ctx: if isinstance(inputs, list): @@ -259,288 +258,4 @@ def tensorrt_converter(method): def register_converter(converter): CONVERTERS[method] = converter return converter - return register_converter - - -# MODULE CONVERTERS - - -@tensorrt_converter('torch.nn.Linear.forward') -def convert_Linear(ctx): - module = ctx.method_args[0] - input = ctx.method_args[1] - output = ctx.method_return - - layer = ctx.network.add_fully_connected( - input=input._trt, - num_outputs=module.out_features, - kernel=module.weight.detach().cpu().numpy(), - bias=module.bias.detach().cpu().numpy()) - - output._trt = layer.get_output(0) - - -@tensorrt_converter('torch.nn.Conv2d.forward') -def convert_Conv2d(ctx): - module = ctx.method_args[0] - input = ctx.method_args[1] - output = ctx.method_return - - kernel_size = module.kernel_size - if not isinstance(kernel_size, tuple): - kernel_size = (kernel_size, ) * 2 - - stride = module.stride - if not isinstance(stride, tuple): - stride = (stride, ) * 2 - - padding = module.padding - if not isinstance(padding, tuple): - padding = (padding, ) * 2 - - kernel = module.weight.detach().cpu().numpy() - - bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) - if module.bias is not None: - bias = module.bias.detach().cpu().numpy() - - layer = ctx.network.add_convolution( - input=input._trt, - num_output_maps=module.out_channels, - kernel_shape=kernel_size, - kernel=kernel, - bias=bias) - layer.stride = stride - layer.padding = padding - - if module.groups is not None: - layer.num_groups = module.groups - - output._trt = layer.get_output(0) - - -@tensorrt_converter('torch.nn.ConvTranspose2d.forward') -def convert_ConvTranspose2d(ctx): - module = ctx.method_args[0] - input = ctx.method_args[1] - output = ctx.method_return - - kernel_size = module.kernel_size - if not isinstance(kernel_size, tuple): - kernel_size = (kernel_size, ) * 2 - - stride = module.stride - if not isinstance(stride, tuple): - stride = (stride, ) * 2 - - padding = module.padding - if not isinstance(padding, tuple): - padding = (padding, ) * 2 - - kernel = module.weight.detach().cpu().numpy() - - bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) - if module.bias is not None: - bias = module.bias.detach().cpu().numpy() - - layer = ctx.network.add_deconvolution( - input=input._trt, - num_output_maps=module.out_channels, - kernel_shape=kernel_size, - kernel=kernel, - bias=bias) - layer.stride = stride - layer.padding = padding - - if module.groups is not None: - layer.num_groups = module.groups - - output._trt = layer.get_output(0) - - -@tensorrt_converter('torch.nn.MaxPool2d.forward') -def convert_MaxPool2d(ctx): - module = ctx.method_args[0] - input = ctx.method_args[1] - output = ctx.method_return - - kernel_size = module.kernel_size - if not isinstance(kernel_size, tuple): - kernel_size = (kernel_size, ) * 2 - - stride = module.stride - if not isinstance(stride, tuple): - stride = (stride, ) * 2 - - padding = module.padding - if not isinstance(padding, tuple): - padding = (padding, ) * 2 - - layer = ctx.network.add_pooling( - input=input._trt, type=trt.PoolingType.MAX, window_size=kernel_size) - layer.stride = stride - layer.padding = padding - - output._trt = layer.get_output(0) - - -@tensorrt_converter('torch.nn.AvgPool2d.forward') -def convert_AvgPool2d(ctx): - module = ctx.method_args[0] - input = ctx.method_args[1] - output = ctx.method_return - - kernel_size = module.kernel_size - if not isinstance(kernel_size, tuple): - kernel_size = (kernel_size, ) * 2 - stride = module.stride - if not isinstance(stride, tuple): - stride = (stride, ) * 2 - padding = module.padding - if not isinstance(padding, tuple): - padding = (padding, ) * 2 - - layer = ctx.network.add_pooling( - input=input._trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size) - layer.stride = stride - layer.padding = padding - layer.average_count_excludes_padding = not module.count_include_pad - - output._trt = layer.get_output(0) - - -@tensorrt_converter('torch.nn.AdaptiveAvgPool2d.forward') -def convert_AdaptiveAvgPool2d(ctx): - module = ctx.method_args[0] - input = ctx.method_args[1] - output = ctx.method_return - - output_size = module.output_size - if not isinstance(output_size, tuple): - output_size = (output_size, ) * 2 - - stride = (input._trt.shape[-2] // output_size[-2], input._trt.shape[-1] // output_size[-1]) - - kernel_size = stride - layer = ctx.network.add_pooling( - input=input._trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size) - layer.stride = stride - - output._trt = layer.get_output(0) - - -@tensorrt_converter('torch.nn.functional.adaptive_avg_pool2d') -def convert_adaptive_avg_pool2d(ctx): - ctx.method_args = (torch.nn.AdaptiveAvgPool2d(ctx.method_args[1]), ctx.method_args[0]) - convert_AdaptiveAvgPool2d(ctx) - - -@tensorrt_converter('torch.nn.ReLU.forward') -def convert_ReLU(ctx): - input = ctx.method_args[1] - output = ctx.method_return - layer = ctx.network.add_activation( - input=input._trt, type=trt.ActivationType.RELU) - output._trt = layer.get_output(0) - - -@tensorrt_converter('torch.nn.functional.relu') -def convert_relu(ctx): - ctx.method_args = (torch.nn.ReLU(),) + ctx.method_args - convert_ReLU(ctx) - - -@tensorrt_converter('torch.nn.ReLU6.forward') -def convert_ReLU6(ctx): - input = ctx.method_args[1] - output = ctx.method_return - - layer = ctx.network.add_activation( - input=input._trt, type=trt.ActivationType.RELU) - shape = (1, ) * len(input._trt.shape) # broadcast all dimensions - tensor = 6.0 * torch.ones(shape, dtype=torch_dtype_from_trt(input._trt.dtype)).cpu().numpy() - trt_6 = ctx.network.add_constant(shape, tensor) - layer = ctx.network.add_elementwise( - layer.get_output(0), trt_6.get_output(0), trt.ElementWiseOperation.MIN) - - output._trt = layer.get_output(0) - - -@tensorrt_converter('torch.nn.functional.relu6') -def convert_relu6(ctx): - ctx.method_args = (torch.nn.ReLU6(),) + ctx.method_args - convert_ReLU6(ctx) - - -@tensorrt_converter('torch.nn.LogSoftmax.forward') -def convert_LogSoftmax(ctx): - input = ctx.method_args[1] - output = ctx.method_return - layer = ctx.network.add_softmax(input=input._trt) - layer = ctx.network.add_unary(input=layer.get_output(0), - op=trt.UnaryOperation.LOG) - output._trt = layer.get_output(0) - - -@tensorrt_converter('torch.nn.Dropout.forward') -@tensorrt_converter('torch.nn.Dropout2d.forward') -@tensorrt_converter('torch.nn.Dropout3d.forward') -def convert_Identity(ctx): - input = ctx.method_args[1] - output = ctx.method_return - output._trt = input._trt - - -@tensorrt_converter('torch.Tensor.view') -@tensorrt_converter('torch.nn.functional.dropout') -@tensorrt_converter('torch.nn.functional.dropout2d') -@tensorrt_converter('torch.nn.functional.dropout3d') -def convert_identity(ctx): - input = ctx.method_args[0] - output = ctx.method_return - output._trt = input._trt - - -@tensorrt_converter('torch.nn.BatchNorm2d.forward') -def convert_BatchNorm2d(ctx): - module = ctx.method_args[0] - input = ctx.method_args[1] - output = ctx.method_return - - scale = module.weight.detach().cpu().numpy() / np.sqrt(module.running_var.detach().cpu().numpy() + module.eps) - bias = module.bias.detach().cpu().numpy() - module.running_mean.detach().cpu().numpy() * scale - power = np.ones_like(scale) - - layer = ctx.network.add_scale(input._trt, trt.ScaleMode.CHANNEL, bias, scale, power) - - output._trt = layer.get_output(0) - - -# TENSOR METHOD CONVERTERS - - -@tensorrt_converter('torch.cat') -def convert_cat(ctx): - inputs = ctx.method_args[0] - - if 'dim' in ctx.method_kwargs: - dim = ctx.method_kwargs['dim'] - else: - dim = ctx.method_args[1] - - output = ctx.method_return - trt_inputs = [i._trt for i in inputs] - - layer = ctx.network.add_concatenation(inputs=trt_inputs) - layer.axis = dim - 1 - output._trt = layer.get_output(0) - - -@tensorrt_converter('torch.Tensor.__iadd__') -@tensorrt_converter('torch.Tensor.__add__') -def convert_add(ctx): - input_a = ctx.method_args[0] - input_b = ctx.method_args[1] - output = ctx.method_return - layer = ctx.network.add_elementwise(input_a._trt, input_b._trt, trt.ElementWiseOperation.SUM) - output._trt = layer.get_output(0) \ No newline at end of file + return register_converter \ No newline at end of file From 2691a733deb262a7566bf079cce7af24db218d7d Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 29 May 2019 14:00:33 -0700 Subject: [PATCH 059/355] added test script --- torch2trt/test.py | 76 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 61 insertions(+), 15 deletions(-) diff --git a/torch2trt/test.py b/torch2trt/test.py index 90799b51..5a8a95a3 100644 --- a/torch2trt/test.py +++ b/torch2trt/test.py @@ -1,5 +1,7 @@ from torch2trt import * import torchvision +import time +import argparse class ModuleTest(object): @@ -33,28 +35,72 @@ def run(self): if not isinstance(outputs, tuple): outputs = (outputs, ) + # compute max error + max_error = 0 for i in range(len(outputs)): - max_error = torch.max(torch.abs(outputs[i] - outputs_trt[i])) - if max_error > self.max_error: - raise RuntimeError('Output %d max error exceeded threshold of %f' % (i, self.max_error)) + max_error_i = torch.max(torch.abs(outputs[i] - outputs_trt[i])) + if max_error_i > max_error: + max_error = max_error + + # benchmark pytorch + t0 = time.time() + for i in range(50): + outputs = module(*inputs) + t1 = time.time() + + fps = 50.0 / (t1 - t0) + + # benchmark tensorrt + t0 = time.time() + for i in range(50): + outputs = module_trt(*inputs) + t1 = time.time() + + fps_trt = 50.0 / (t1 - t0) + + return max_error, fps, fps_trt TESTS = { - 'resnet18_fp16': ModuleTest( - torchvision.models.resnet18, - torch.float16, - torch.device('cuda'), - [(1, 3, 224, 224)], - max_error=1e-2, - fp16_mode=True - ), + 'alexnet_fp16_3x224x224': ModuleTest(torchvision.models.alexnet, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), + 'squeezenet1_0_fp16_3x224x224': ModuleTest(torchvision.models.squeezenet1_0, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), + 'squeezenet1_1_fp16_3x224x224': ModuleTest(torchvision.models.squeezenet1_1, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), + 'resnet18_fp16_3x224x224': ModuleTest(torchvision.models.resnet18, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), + 'resnet34_fp16_3x224x224': ModuleTest(torchvision.models.resnet34, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), + 'resnet50_fp16_3x224x224': ModuleTest(torchvision.models.resnet50, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), + 'resnet101_fp16_3x224x224': ModuleTest(torchvision.models.resnet101, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), + 'resnet152_fp16_3x224x224': ModuleTest(torchvision.models.resnet152, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), + 'densenet121_fp16_3x224x224': ModuleTest(torchvision.models.densenet121, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), + 'densenet169_fp16_3x224x224': ModuleTest(torchvision.models.densenet169, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), + 'densenet201_fp16_3x224x224': ModuleTest(torchvision.models.densenet201, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), + 'densenet161_fp16_3x224x224': ModuleTest(torchvision.models.densenet161, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), + 'vgg11_fp16_3x224x224': ModuleTest(torchvision.models.vgg11, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), + 'vgg13_fp16_3x224x224': ModuleTest(torchvision.models.vgg13, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), + 'vgg16_fp16_3x224x224': ModuleTest(torchvision.models.vgg16, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), + 'vgg19_fp16_3x224x224': ModuleTest(torchvision.models.vgg19, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), + 'vgg11_bn_fp16_3x224x224': ModuleTest(torchvision.models.vgg11_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), + 'vgg13_bn_fp16_3x224x224': ModuleTest(torchvision.models.vgg13_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), + 'vgg16_bn_fp16_3x224x224': ModuleTest(torchvision.models.vgg16_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), + 'vgg19_bn_fp16_3x224x224': ModuleTest(torchvision.models.vgg19_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), } if __name__ == '__main__': + + parser = argparse.ArgumentParser() + parser.add_argument('--output', '-o', help='Test output file path', type=str, default='torch2trt_test.md') + args = parser.parse_args() + + print('| Name | Max Error | FPS (PyTorch) | FPS (TensorRT) |') + print('|------|-----------|---------------|----------------|') for name, test in TESTS.items(): - print('Testing %s ...' % name, end=" ") - test.run() - print('PASSED') - \ No newline at end of file + line = None + try: + max_error, fps, fps_trt = test.run() + line = '| %s | %.3g | %.3g | %.3g |' % (name, max_error, fps, fps_trt) + except: + line = '| %s | | | |' % name + print(line) + with open(args.output, 'a+') as f: + f.write(line + '\n') \ No newline at end of file From 121794517cdfc060fcfd4e549dcb0f4ffd2fd986 Mon Sep 17 00:00:00 2001 From: John Date: Wed, 29 May 2019 16:58:18 -0700 Subject: [PATCH 060/355] Update README.md --- README.md | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 199ceb1d..5527721a 100644 --- a/README.md +++ b/README.md @@ -67,31 +67,28 @@ model_trt.load_state_dict(torch.load('alexnet_trt.pth')) ### Tested models -Below are models that we benchmarked on NVIDIA Jetson Nano. Timing just includes model execution (not data copy). - -| Model | PyTorch FP16 (Jetson Nano) | TensorRT FP16 (Jetson Nano) | -|-------|--------------|-----------------| -| alexnet | 18ms | 13ms | -| squeezenet1_0 | 21ms | 8.4ms | -| squeezenet1_1 | 13ms | 4.7ms | -| resnet18 | 32ms | 11ms | -| resnet34 | 58ms | 21ms | -| resnet50 | 77ms | 38ms | -| resnet101 | 135ms | 62ms | -| resnet152 | 200ms | 93ms | -| densenet121 | 83ms | 46ms | -| densenet169 | 116ms | 58ms | -| densenet201 | 139ms | 75ms | -| densenet161 | 209ms | 97ms | -| vgg11 | 61ms | 17ms | -| vgg13 | 96ms | 33ms | -| vgg16 | 137ms | 44ms | -| vgg19 | | | -| vgg11_bn | | | -| vgg13_bn | | | -| vgg16_bn | | | -| vgg19_bn | | | -| [mobilenet_v2](https://github.com/tonylins/pytorch-mobilenet-v2) | 27ms | 16ms | +Below are models that we benchmarked on NVIDIA Jetson Nano using [this script](torch2trt/test.py). Timing just includes model execution (not data copy). + +| Model | Max Error | FPS (PyTorch) | FPS (TensorRT) | +|------|-----------|---------------|----------------| +| alexnet_fp16_3x224x224 | 3.05e-05 | 91.7 | 58.5 | +| squeezenet1_0_fp16_3x224x224 | 0.00732 | 47.8 | 114 | +| squeezenet1_1_fp16_3x224x224 | 0.00781 | 71.7 | 264 | +| resnet18_fp16_3x224x224 | 0.00537 | 34.8 | 66.1 | +| resnet34_fp16_3x224x224 | 0.0938 | 17.7 | 38.6 | +| resnet50_fp16_3x224x224 | 0.123 | 13 | 27.7 | +| resnet101_fp16_3x224x224 | 0 | 5.56 | 15.1 | +| resnet152_fp16_3x224x224 | 0 | 5.01 | 10.8 | +| densenet121_fp16_3x224x224 | 0.00488 | 10.7 | 38.5 | +| densenet169_fp16_3x224x224 | 0.00488 | 8.02 | 31.2 | +| densenet201_fp16_3x224x224 | 0.00537 | 5.01 | 8.41 | +| densenet161_fp16_3x224x224 | 0.00635 | 4.67 | 11.9 | +| vgg11_fp16_3x224x224 | 0.00104 | 15 | 14.7 | +| vgg13_fp16_3x224x224 | 0.000504 | 10.5 | 11.8 | +| vgg16_fp16_3x224x224 | 0.000565 | 7.23 | 10.3 | +| vgg11_bn_fp16_3x224x224 | 0.000626 | 13.4 | 15.8 | +| vgg13_bn_fp16_3x224x224 | 0.000908 | 9.19 | 12.9 | +| vgg16_bn_fp16_3x224x224 | 0.00107 | 6.61 | 11 | ### How does it work? From dd6973fabad13ed73a4a67d5218f81c36026c8bf Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 30 May 2019 11:20:12 -0700 Subject: [PATCH 061/355] max error fix --- torch2trt/test.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/torch2trt/test.py b/torch2trt/test.py index 5a8a95a3..2061b5bb 100644 --- a/torch2trt/test.py +++ b/torch2trt/test.py @@ -2,6 +2,7 @@ import torchvision import time import argparse +import re class ModuleTest(object): @@ -40,7 +41,7 @@ def run(self): for i in range(len(outputs)): max_error_i = torch.max(torch.abs(outputs[i] - outputs_trt[i])) if max_error_i > max_error: - max_error = max_error + max_error = max_error_i # benchmark pytorch t0 = time.time() @@ -90,11 +91,14 @@ def run(self): parser = argparse.ArgumentParser() parser.add_argument('--output', '-o', help='Test output file path', type=str, default='torch2trt_test.md') + parser.add_argument('--filter', '-f', help='Regular expression applied to filter tests by name', type=str, default='.*') args = parser.parse_args() print('| Name | Max Error | FPS (PyTorch) | FPS (TensorRT) |') print('|------|-----------|---------------|----------------|') for name, test in TESTS.items(): + if not re.match(args.filter, name): + continue line = None try: max_error, fps, fps_trt = test.run() From 1c5a46055258145223d031aa6defd1137f5f9c38 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 30 May 2019 12:42:21 -0700 Subject: [PATCH 062/355] refactored tests --- torch2trt/test.py | 60 ++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/torch2trt/test.py b/torch2trt/test.py index 2061b5bb..85740106 100644 --- a/torch2trt/test.py +++ b/torch2trt/test.py @@ -6,14 +6,16 @@ class ModuleTest(object): - def __init__(self, module_fn, type, device, input_shapes, max_error=1e-2, **torch2trt_kwargs): + def __init__(self, module_fn, type, device, input_shapes, **torch2trt_kwargs): self.module_fn = module_fn self.type = type self.device = device self.input_shapes = input_shapes - self.max_error = max_error self.torch2trt_kwargs = torch2trt_kwargs + def module_name(self): + return self.module_fn.__module__ + '.' + self.module_fn.__name__ + def run(self): # create module module = self.module_fn() @@ -61,43 +63,43 @@ def run(self): return max_error, fps, fps_trt - - -TESTS = { - 'alexnet_fp16_3x224x224': ModuleTest(torchvision.models.alexnet, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), - 'squeezenet1_0_fp16_3x224x224': ModuleTest(torchvision.models.squeezenet1_0, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), - 'squeezenet1_1_fp16_3x224x224': ModuleTest(torchvision.models.squeezenet1_1, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), - 'resnet18_fp16_3x224x224': ModuleTest(torchvision.models.resnet18, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), - 'resnet34_fp16_3x224x224': ModuleTest(torchvision.models.resnet34, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), - 'resnet50_fp16_3x224x224': ModuleTest(torchvision.models.resnet50, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), - 'resnet101_fp16_3x224x224': ModuleTest(torchvision.models.resnet101, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), - 'resnet152_fp16_3x224x224': ModuleTest(torchvision.models.resnet152, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), - 'densenet121_fp16_3x224x224': ModuleTest(torchvision.models.densenet121, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), - 'densenet169_fp16_3x224x224': ModuleTest(torchvision.models.densenet169, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), - 'densenet201_fp16_3x224x224': ModuleTest(torchvision.models.densenet201, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), - 'densenet161_fp16_3x224x224': ModuleTest(torchvision.models.densenet161, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), - 'vgg11_fp16_3x224x224': ModuleTest(torchvision.models.vgg11, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), - 'vgg13_fp16_3x224x224': ModuleTest(torchvision.models.vgg13, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), - 'vgg16_fp16_3x224x224': ModuleTest(torchvision.models.vgg16, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), - 'vgg19_fp16_3x224x224': ModuleTest(torchvision.models.vgg19, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), - 'vgg11_bn_fp16_3x224x224': ModuleTest(torchvision.models.vgg11_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), - 'vgg13_bn_fp16_3x224x224': ModuleTest(torchvision.models.vgg13_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), - 'vgg16_bn_fp16_3x224x224': ModuleTest(torchvision.models.vgg16_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), - 'vgg19_bn_fp16_3x224x224': ModuleTest(torchvision.models.vgg19_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], max_error=1e-2, fp16_mode=True), -} + +MODULE_TESTS = [ + ModuleTest(torchvision.models.alexnet, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.squeezenet1_0, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.squeezenet1_1, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.resnet18, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.resnet34, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.resnet50, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.resnet101, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.resnet152, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.densenet121, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.densenet169, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.densenet201, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.densenet161, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.vgg11, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.vgg13, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.vgg16, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.vgg19, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.vgg11_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.vgg13_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.vgg16_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.vgg19_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), +] if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--output', '-o', help='Test output file path', type=str, default='torch2trt_test.md') - parser.add_argument('--filter', '-f', help='Regular expression applied to filter tests by name', type=str, default='.*') + parser.add_argument('--name', help='Regular expression to filter modules to test by name', type=str, default='.*') args = parser.parse_args() print('| Name | Max Error | FPS (PyTorch) | FPS (TensorRT) |') print('|------|-----------|---------------|----------------|') - for name, test in TESTS.items(): - if not re.match(args.filter, name): + for test in MODULE_TESTS: + name = test.module_name() + if not re.search(args.name, name): continue line = None try: From 7b42f16e4201c0e74d9ee7c16848ae60e99a5bf6 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 30 May 2019 13:36:50 -0700 Subject: [PATCH 063/355] update test.py --- torch2trt/test.py | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/torch2trt/test.py b/torch2trt/test.py index 85740106..87659290 100644 --- a/torch2trt/test.py +++ b/torch2trt/test.py @@ -6,9 +6,9 @@ class ModuleTest(object): - def __init__(self, module_fn, type, device, input_shapes, **torch2trt_kwargs): + def __init__(self, module_fn, dtype, device, input_shapes, **torch2trt_kwargs): self.module_fn = module_fn - self.type = type + self.dtype = dtype self.device = device self.input_shapes = input_shapes self.torch2trt_kwargs = torch2trt_kwargs @@ -20,13 +20,13 @@ def run(self): # create module module = self.module_fn() module = module.to(self.device) - module = module.type(self.type) + module = module.type(self.dtype) module = module.eval() # create inputs inputs = () for shape in self.input_shapes: - inputs += (torch.ones(shape).to(self.device).type(self.type), ) + inputs += (torch.ones(shape).to(self.device).type(self.dtype), ) # convert module module_trt = torch2trt(module, inputs, **self.torch2trt_kwargs) @@ -95,18 +95,27 @@ def run(self): parser.add_argument('--name', help='Regular expression to filter modules to test by name', type=str, default='.*') args = parser.parse_args() - print('| Name | Max Error | FPS (PyTorch) | FPS (TensorRT) |') - print('|------|-----------|---------------|----------------|') + # write header + line0 = '| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | FPS (PyTorch) | FPS (TensorRT) |' + line1 = '|------|-----------|--------------|------------------|-----------|---------------|----------------|' + print(line0) + print(line1) + with open(args.output, 'a+') as f: + f.write(line0 + '\n') + f.write(line1 + '\n') + for test in MODULE_TESTS: + + # filter by module name name = test.module_name() if not re.search(args.name, name): continue - line = None - try: - max_error, fps, fps_trt = test.run() - line = '| %s | %.3g | %.3g | %.3g |' % (name, max_error, fps, fps_trt) - except: - line = '| %s | | | |' % name + + # run test + max_error, fps, fps_trt = test.run() + + # write entry + line = '| %s | %s | %s | %s | %.2E | %.3g | %.3g |' % (name, test.dtype.__repr__().split('.')[-1], str(test.input_shapes), str(test.torch2trt_kwargs), max_error, fps, fps_trt) print(line) with open(args.output, 'a+') as f: f.write(line + '\n') \ No newline at end of file From 34f2fd7becfadfee47847a05fafa06df0734cbc9 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 30 May 2019 13:58:32 -0700 Subject: [PATCH 064/355] added test bash script (to handle killed processes) --- test.sh | 27 +++++++++++++++++++++++++++ torch2trt/test.py | 9 --------- 2 files changed, 27 insertions(+), 9 deletions(-) create mode 100644 test.sh diff --git a/test.sh b/test.sh new file mode 100644 index 00000000..8ea8ed45 --- /dev/null +++ b/test.sh @@ -0,0 +1,27 @@ +OUTPUT_FILE=$1 + +touch $OUTPUT_FILE + +echo "| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | FPS (PyTorch) | FPS (TensorRT) |" >> $OUTPUT_FILE +echo "|------|-----------|--------------|------------------|-----------|---------------|----------------|" >> $OUTPUT_FILE + +python3 -m torch2trt.test -o $OUTPUT_FILE --name alexnet +python3 -m torch2trt.test -o $OUTPUT_FILE --name squeezenet1_0 +python3 -m torch2trt.test -o $OUTPUT_FILE --name squeezenet1_1 +python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet18 +python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet34 +python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet50 +python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet101 +python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet152 +python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet121 +python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet169 +python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet201 +python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet161 +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg11 +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg13 +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg16 +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg19 +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg11_bn +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg13_bn +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg16_bn +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg19_bn \ No newline at end of file diff --git a/torch2trt/test.py b/torch2trt/test.py index 87659290..30bb499a 100644 --- a/torch2trt/test.py +++ b/torch2trt/test.py @@ -94,15 +94,6 @@ def run(self): parser.add_argument('--output', '-o', help='Test output file path', type=str, default='torch2trt_test.md') parser.add_argument('--name', help='Regular expression to filter modules to test by name', type=str, default='.*') args = parser.parse_args() - - # write header - line0 = '| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | FPS (PyTorch) | FPS (TensorRT) |' - line1 = '|------|-----------|--------------|------------------|-----------|---------------|----------------|' - print(line0) - print(line1) - with open(args.output, 'a+') as f: - f.write(line0 + '\n') - f.write(line1 + '\n') for test in MODULE_TESTS: From 668063cd67bfb572bb00382801794fb03cf7f806 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 31 May 2019 17:28:06 -0400 Subject: [PATCH 065/355] fixed duplicate vgg test in test.sh --- test.sh | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/test.sh b/test.sh index 8ea8ed45..4aaebd4a 100644 --- a/test.sh +++ b/test.sh @@ -5,23 +5,23 @@ touch $OUTPUT_FILE echo "| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | FPS (PyTorch) | FPS (TensorRT) |" >> $OUTPUT_FILE echo "|------|-----------|--------------|------------------|-----------|---------------|----------------|" >> $OUTPUT_FILE -python3 -m torch2trt.test -o $OUTPUT_FILE --name alexnet -python3 -m torch2trt.test -o $OUTPUT_FILE --name squeezenet1_0 -python3 -m torch2trt.test -o $OUTPUT_FILE --name squeezenet1_1 -python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet18 -python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet34 -python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet50 -python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet101 -python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet152 -python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet121 -python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet169 -python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet201 -python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet161 -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg11 -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg13 -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg16 -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg19 -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg11_bn -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg13_bn -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg16_bn -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg19_bn \ No newline at end of file +#python3 -m torch2trt.test -o $OUTPUT_FILE --name alexnet +#python3 -m torch2trt.test -o $OUTPUT_FILE --name squeezenet1_0 +#python3 -m torch2trt.test -o $OUTPUT_FILE --name squeezenet1_1 +#python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet18 +#python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet34 +#python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet50 +#python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet101 +#python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet152 +#python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet121 +#python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet169 +#python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet201 +#python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet161 +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg11$ +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg13$ +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg16$ +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg19$ +#python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg11_bn +#python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg13_bn +#python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg16_bn +#python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg19_bn From 92621a13f85dde066d72308ebe44f8d992d1e8d9 Mon Sep 17 00:00:00 2001 From: John Date: Fri, 31 May 2019 15:07:24 -0700 Subject: [PATCH 066/355] Create BENCHMARKS.md --- BENCHMARKS.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 BENCHMARKS.md diff --git a/BENCHMARKS.md b/BENCHMARKS.md new file mode 100644 index 00000000..9d9f2ec1 --- /dev/null +++ b/BENCHMARKS.md @@ -0,0 +1,30 @@ +# Benchmarks + +This page contains various benchmark results on different platforms using [this script](torch2trt/test.sh). Currently, all benchmarks target batch size 1. + +## Jetson Nano + +## Jetson Xavier + +| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | FPS (PyTorch) | FPS (TensorRT) | +|------|-----------|--------------|------------------|-----------|---------------|----------------| +| torchvision.models.alexnet.alexnet | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.05E-05 | 354 | 560 | +| torchvision.models.squeezenet.squeezenet1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.56E-02 | 98.5 | 1.19e+03 | +| torchvision.models.squeezenet.squeezenet1_1 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 7.32E-04 | 103 | 1.5e+03 | +| torchvision.models.resnet.resnet18 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.93E-03 | 104 | 1.17e+03 | +| torchvision.models.resnet.resnet34 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.25E-01 | 57.6 | 516 | +| torchvision.models.resnet.resnet50 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.38E-02 | 42.1 | 358 | +| torchvision.models.resnet.resnet101 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 24 | 185 | +| torchvision.models.resnet.resnet152 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 17.7 | 127 | +| torchvision.models.densenet.densenet121 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.66E-03 | 20.3 | 132 | +| torchvision.models.densenet.densenet169 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.93E-03 | 15.1 | 120 | +| torchvision.models.densenet.densenet201 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.37E-03 | 12.8 | 93.4 | +| torchvision.models.densenet.densenet161 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.13E-03 | 16.2 | 85.3 | +| torchvision.models.vgg.vgg11 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 6.10E-04 | 118 | 183 | +| torchvision.models.vgg.vgg13 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 7.78E-04 | 93.8 | 161 | +| torchvision.models.vgg.vgg16 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 8.77E-04 | 76.2 | 138 | +| torchvision.models.vgg.vgg19 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.92E-04 | 63.9 | 123 | +| torchvision.models.vgg.vgg11_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 6.41E-04 | 109 | 190 | +| torchvision.models.vgg.vgg13_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 6.62E-04 | 86.3 | 163 | +| torchvision.models.vgg.vgg16_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.46E-03 | 70.3 | 142 | +| torchvision.models.vgg.vgg19_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 8.16E-04 | 59.4 | 128 | From 07c135a7aeb4653ccae492fb5fa840dfc0d76ba8 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 31 May 2019 18:03:58 -0400 Subject: [PATCH 067/355] uncommented tests --- test.sh | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/test.sh b/test.sh index 4aaebd4a..cfc1ccfa 100644 --- a/test.sh +++ b/test.sh @@ -5,23 +5,23 @@ touch $OUTPUT_FILE echo "| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | FPS (PyTorch) | FPS (TensorRT) |" >> $OUTPUT_FILE echo "|------|-----------|--------------|------------------|-----------|---------------|----------------|" >> $OUTPUT_FILE -#python3 -m torch2trt.test -o $OUTPUT_FILE --name alexnet -#python3 -m torch2trt.test -o $OUTPUT_FILE --name squeezenet1_0 -#python3 -m torch2trt.test -o $OUTPUT_FILE --name squeezenet1_1 -#python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet18 -#python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet34 -#python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet50 -#python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet101 -#python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet152 -#python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet121 -#python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet169 -#python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet201 -#python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet161 +python3 -m torch2trt.test -o $OUTPUT_FILE --name alexnet +python3 -m torch2trt.test -o $OUTPUT_FILE --name squeezenet1_0 +python3 -m torch2trt.test -o $OUTPUT_FILE --name squeezenet1_1 +python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet18 +python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet34 +python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet50 +python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet101 +python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet152 +python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet121 +python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet169 +python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet201 +python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet161 python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg11$ python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg13$ python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg16$ python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg19$ -#python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg11_bn -#python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg13_bn -#python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg16_bn -#python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg19_bn +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg11_bn +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg13_bn +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg16_bn +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg19_bn \ No newline at end of file From 42c05d6325378d84888db393e45cb47146e77025 Mon Sep 17 00:00:00 2001 From: John Date: Thu, 13 Jun 2019 11:17:45 -0700 Subject: [PATCH 068/355] Update README.md --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 5527721a..6b4eb82c 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,11 @@ TensorRT Python API. The converter is If you find an issue, please [let us know](../..//issues)! +> Please note, this converter has limited coverage of TensorRT / PyTorch. We've designed it for +> easy prototyping with the tested models below, which we use for tasks like collision avoidance and road +> following in the [JetBot](https://github.com/NVIDIA-AI-IOT/jetbot) project. If you find the converter +> helpful with other models, please [let us know](../..//issues). + ### Setup ```bash From b225b0be743dad96b410812b87808e57f18af062 Mon Sep 17 00:00:00 2001 From: John Date: Thu, 13 Jun 2019 11:19:48 -0700 Subject: [PATCH 069/355] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6b4eb82c..e6af768c 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,8 @@ model_trt.load_state_dict(torch.load('alexnet_trt.pth')) ### Tested models -Below are models that we benchmarked on NVIDIA Jetson Nano using [this script](torch2trt/test.py). Timing just includes model execution (not data copy). +Below are models that we benchmarked on NVIDIA Jetson Nano using [this script](torch2trt/test.py). + | Model | Max Error | FPS (PyTorch) | FPS (TensorRT) | |------|-----------|---------------|----------------| From 506bf5aa186cdd01eebe4779dd9f2778e2cf6c5d Mon Sep 17 00:00:00 2001 From: John Date: Thu, 13 Jun 2019 11:22:07 -0700 Subject: [PATCH 070/355] Update README.md --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index e6af768c..12c9359c 100644 --- a/README.md +++ b/README.md @@ -143,5 +143,4 @@ Please see the ``torch2trt.py`` module for more examples. TensorRT currently does not support variable size Tensors, so whatever input shape you use when converting, you must use when executing. While this may seem limiting, it can actually be a good constraint when designing your model for use in embedded systems. By -restricting to a fixed input size, we can expect similar memory usage and runtime. Ultimately, even if -TensorRT didn't have this constraint, you'd probably want to have it anyways :) +restricting to a fixed input size, we can expect similar memory usage and runtime. From e515807198d01ddefbdc759b0e8fae2703fa2723 Mon Sep 17 00:00:00 2001 From: John Date: Thu, 13 Jun 2019 14:27:12 -0700 Subject: [PATCH 071/355] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 12c9359c..d9d3bcb5 100644 --- a/README.md +++ b/README.md @@ -9,8 +9,8 @@ TensorRT Python API. The converter is If you find an issue, please [let us know](../..//issues)! -> Please note, this converter has limited coverage of TensorRT / PyTorch. We've designed it for -> easy prototyping with the tested models below, which we use for tasks like collision avoidance and road +> Please note, this converter has limited coverage of TensorRT / PyTorch. We created it +> to easily optimize the models listed below. We use these models for tasks like collision avoidance and road > following in the [JetBot](https://github.com/NVIDIA-AI-IOT/jetbot) project. If you find the converter > helpful with other models, please [let us know](../..//issues). From 224477e46ea60de212da36f3f33beee516dd6ae9 Mon Sep 17 00:00:00 2001 From: John Date: Thu, 13 Jun 2019 14:38:17 -0700 Subject: [PATCH 072/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d9d3bcb5..f2212465 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ TensorRT Python API. The converter is If you find an issue, please [let us know](../..//issues)! > Please note, this converter has limited coverage of TensorRT / PyTorch. We created it -> to easily optimize the models listed below. We use these models for tasks like collision avoidance and road +> to easily prototype applications using the models listed below. We use these models for tasks like collision avoidance and road > following in the [JetBot](https://github.com/NVIDIA-AI-IOT/jetbot) project. If you find the converter > helpful with other models, please [let us know](../..//issues). From 3f0a2393b22299e12a18d6e019f6400854ae65cb Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 29 Jan 2018 08:44:41 -0800 Subject: [PATCH 073/355] conversion / live demo notebooks split --- .../{resnet18.ipynb => conversion.ipynb} | 105 ++-------- notebooks/resnet18/live_demo.ipynb | 186 ++++++++++++++++++ 2 files changed, 199 insertions(+), 92 deletions(-) rename notebooks/resnet18/{resnet18.ipynb => conversion.ipynb} (56%) create mode 100644 notebooks/resnet18/live_demo.ipynb diff --git a/notebooks/resnet18/resnet18.ipynb b/notebooks/resnet18/conversion.ipynb similarity index 56% rename from notebooks/resnet18/resnet18.ipynb rename to notebooks/resnet18/conversion.ipynb index 39ab403e..7d84b088 100644 --- a/notebooks/resnet18/resnet18.ipynb +++ b/notebooks/resnet18/conversion.ipynb @@ -104,125 +104,46 @@ ] }, { - "cell_type": "code", - "execution_count": 15, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "import json\n", - "\n", - "with open('imagenet_labels.json', 'r') as f:\n", - " labels = json.load(f)" + "We can save the model like this" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "import cv2\n", - "import numpy as np\n", - "\n", - "device = torch.device('cuda')\n", - "mean = 255.0 * np.array([0.485, 0.456, 0.406])\n", - "stdev = 255.0 * np.array([0.229, 0.224, 0.225])\n", - "\n", - "normalize = torchvision.transforms.Normalize(mean, stdev)\n", - "\n", - "def preprocess(camera_value):\n", - " global device, normalize\n", - " x = camera_value\n", - " x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)\n", - " x = x.transpose((2, 0, 1))\n", - " x = torch.from_numpy(x).float()\n", - " x = normalize(x)\n", - " x = x.to(device)\n", - " x = x[None, ...]\n", - " return x" + "torch.save(model_trt.state_dict(), 'resnet18_trt.pth')" ] }, { - "cell_type": "code", - "execution_count": 17, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "from jetbot import Camera\n", - "import ipywidgets\n", - "\n", - "camera = Camera(width=224, height=224)" + "And load the model like this." ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "e53efc4ab70d4b19a622c944cf92d81b", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Image(value=b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00\\x00\\x01\\x00\\x01\\x00\\x00\\xff\\xdb\\x00C\\x00\\x02\\x01\\x0…" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "from jetbot import bgr8_to_jpeg\n", - "import traitlets\n", + "from torch2trt import TRTModule\n", "\n", - "image_w = ipywidgets.Image()\n", + "model_trt = TRTModule()\n", "\n", - "traitlets.dlink((camera, 'value'), (image_w, 'value'), transform=bgr8_to_jpeg)\n", - "\n", - "display(image_w)" + "model_trt.load_state_dict(torch.load('resnet18_trt.pth'))" ] }, { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "152f1b979b2e46199daea08c14f9265b", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Textarea(value='')" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "text = ipywidgets.Textarea()\n", - "display(text)" - ] - }, - { - "cell_type": "code", - "execution_count": 78, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "def execute(change):\n", - " image = change['new']\n", - " output = model_trt(preprocess(image).half()).detach().cpu().numpy().flatten()\n", - " idx = output.argmax()\n", - " text.value = labels[idx]\n", - "\n", - "execute({'new': camera.value})" + "That's it for this notebook! Try out the live demo to see real-time classification on a video feed." ] } ], diff --git a/notebooks/resnet18/live_demo.ipynb b/notebooks/resnet18/live_demo.ipynb new file mode 100644 index 00000000..08e11431 --- /dev/null +++ b/notebooks/resnet18/live_demo.ipynb @@ -0,0 +1,186 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook will run a live demo on Jetson Nano using [JetCam](https://github.com/NVIDIA-AI-IOT/jetcam) to acquire images from the camera. First,\n", + "let's start the camera. See the JetCam examples for details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from jetcam.csi_camera import CSICamera\n", + "# from jetcam.usb_camera import USBCamera\n", + "\n", + "camera = CSICamera(width=224, height=224)\n", + "# camera = USBCamera(width=224, height=224)\n", + "\n", + "camera.running = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, let's connect the camera's value to a widget to display." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from jetcam.utils import bgr8_to_jpeg\n", + "import traitlets\n", + "import ipywidgetsb\n", + "\n", + "image_w = ipywidgets.Image()\n", + "\n", + "traitlets.dlink((camera, 'value'), (image_w, 'value'), transform=bgr8_to_jpeg)\n", + "\n", + "display(image_w)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we'll load the TensorRT model. (We assume you followed the conversion notebook and saved to the path ``resnet18_trt.pth``)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from torch2trt import TRTModule\n", + "\n", + "model_trt = TRTModule()\n", + "model_trt.load_state_dict(torch.load('resnet18_trt.pth'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following function will be used to pre-process images from the camera" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import cv2\n", + "import numpy as np\n", + "\n", + "device = torch.device('cuda')\n", + "mean = 255.0 * np.array([0.485, 0.456, 0.406])\n", + "stdev = 255.0 * np.array([0.229, 0.224, 0.225])\n", + "\n", + "normalize = torchvision.transforms.Normalize(mean, stdev)\n", + "\n", + "def preprocess(camera_value):\n", + " global device, normalize\n", + " x = camera_value\n", + " x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)\n", + " x = x.transpose((2, 0, 1))\n", + " x = torch.from_numpy(x).float()\n", + " x = normalize(x)\n", + " x = x.to(device)\n", + " x = x[None, ...]\n", + " return x" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This text area will be used to display the class predictions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "text = ipywidgets.Textarea()\n", + "display(text)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We load the imagenet labels to associate the neural network output with a class name." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "with open('imagenet_labels.json', 'r') as f:\n", + " labels = json.load(f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we create our execution function, which we attach as a callback to the camera's ``value`` attribute.\n", + "\n", + "Whenever the camera's value is updated (which it will be for each frame, since we set ``camera.running = True``). This function will be called\n", + "describing how the value changed. The new camera value will be stored in ``change['new']``." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def execute(change):\n", + " image = change['new']\n", + " output = model_trt(preprocess(image).half()).detach().cpu().numpy().flatten()\n", + " idx = output.argmax()\n", + " text.value = labels[idx]\n", + "\n", + "camera.observe(execute, names='value')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 8cd664c292aef173dbbcaccb3dd22ad8066dea58 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sat, 3 Feb 2018 04:20:13 -0800 Subject: [PATCH 074/355] added synchronization to test.py --- torch2trt/test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/torch2trt/test.py b/torch2trt/test.py index 30bb499a..4583bb89 100644 --- a/torch2trt/test.py +++ b/torch2trt/test.py @@ -49,6 +49,7 @@ def run(self): t0 = time.time() for i in range(50): outputs = module(*inputs) + torch.cuda.current_stream().synchronize() t1 = time.time() fps = 50.0 / (t1 - t0) @@ -57,6 +58,7 @@ def run(self): t0 = time.time() for i in range(50): outputs = module_trt(*inputs) + torch.cuda.current_stream().synchronize() t1 = time.time() fps_trt = 50.0 / (t1 - t0) From 4062d6ae329beedca500e086ae3596ac395022d0 Mon Sep 17 00:00:00 2001 From: John Date: Thu, 13 Jun 2019 14:51:15 -0700 Subject: [PATCH 075/355] Delete BENCHMARKS.md --- BENCHMARKS.md | 30 ------------------------------ 1 file changed, 30 deletions(-) delete mode 100644 BENCHMARKS.md diff --git a/BENCHMARKS.md b/BENCHMARKS.md deleted file mode 100644 index 9d9f2ec1..00000000 --- a/BENCHMARKS.md +++ /dev/null @@ -1,30 +0,0 @@ -# Benchmarks - -This page contains various benchmark results on different platforms using [this script](torch2trt/test.sh). Currently, all benchmarks target batch size 1. - -## Jetson Nano - -## Jetson Xavier - -| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | FPS (PyTorch) | FPS (TensorRT) | -|------|-----------|--------------|------------------|-----------|---------------|----------------| -| torchvision.models.alexnet.alexnet | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.05E-05 | 354 | 560 | -| torchvision.models.squeezenet.squeezenet1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.56E-02 | 98.5 | 1.19e+03 | -| torchvision.models.squeezenet.squeezenet1_1 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 7.32E-04 | 103 | 1.5e+03 | -| torchvision.models.resnet.resnet18 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.93E-03 | 104 | 1.17e+03 | -| torchvision.models.resnet.resnet34 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.25E-01 | 57.6 | 516 | -| torchvision.models.resnet.resnet50 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.38E-02 | 42.1 | 358 | -| torchvision.models.resnet.resnet101 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 24 | 185 | -| torchvision.models.resnet.resnet152 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 17.7 | 127 | -| torchvision.models.densenet.densenet121 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.66E-03 | 20.3 | 132 | -| torchvision.models.densenet.densenet169 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.93E-03 | 15.1 | 120 | -| torchvision.models.densenet.densenet201 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.37E-03 | 12.8 | 93.4 | -| torchvision.models.densenet.densenet161 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.13E-03 | 16.2 | 85.3 | -| torchvision.models.vgg.vgg11 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 6.10E-04 | 118 | 183 | -| torchvision.models.vgg.vgg13 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 7.78E-04 | 93.8 | 161 | -| torchvision.models.vgg.vgg16 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 8.77E-04 | 76.2 | 138 | -| torchvision.models.vgg.vgg19 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.92E-04 | 63.9 | 123 | -| torchvision.models.vgg.vgg11_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 6.41E-04 | 109 | 190 | -| torchvision.models.vgg.vgg13_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 6.62E-04 | 86.3 | 163 | -| torchvision.models.vgg.vgg16_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.46E-03 | 70.3 | 142 | -| torchvision.models.vgg.vgg19_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 8.16E-04 | 59.4 | 128 | From c2724890d8cda2263339eb08938499c589191219 Mon Sep 17 00:00:00 2001 From: John Date: Thu, 13 Jun 2019 14:54:01 -0700 Subject: [PATCH 076/355] Update README.md --- README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f2212465..9b4a2303 100644 --- a/README.md +++ b/README.md @@ -9,10 +9,8 @@ TensorRT Python API. The converter is If you find an issue, please [let us know](../..//issues)! -> Please note, this converter has limited coverage of TensorRT / PyTorch. We created it -> to easily prototype applications using the models listed below. We use these models for tasks like collision avoidance and road -> following in the [JetBot](https://github.com/NVIDIA-AI-IOT/jetbot) project. If you find the converter -> helpful with other models, please [let us know](../..//issues). +> Please note, this converter has limited coverage of TensorRT / PyTorch. We created it primarily +> to easily optimize the models used in the [JetBot](https://github.com/NVIDIA-AI-IOT/jetbot) project. If you find the converter helpful with other models, please [let us know](../..//issues). ### Setup From a6c532ce135434844338f5209cf71b67e5beca4e Mon Sep 17 00:00:00 2001 From: John Date: Thu, 13 Jun 2019 15:03:16 -0700 Subject: [PATCH 077/355] Update README.md --- README.md | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 9b4a2303..b0d54531 100644 --- a/README.md +++ b/README.md @@ -70,29 +70,30 @@ model_trt.load_state_dict(torch.load('alexnet_trt.pth')) ### Tested models -Below are models that we benchmarked on NVIDIA Jetson Nano using [this script](torch2trt/test.py). - - -| Model | Max Error | FPS (PyTorch) | FPS (TensorRT) | -|------|-----------|---------------|----------------| -| alexnet_fp16_3x224x224 | 3.05e-05 | 91.7 | 58.5 | -| squeezenet1_0_fp16_3x224x224 | 0.00732 | 47.8 | 114 | -| squeezenet1_1_fp16_3x224x224 | 0.00781 | 71.7 | 264 | -| resnet18_fp16_3x224x224 | 0.00537 | 34.8 | 66.1 | -| resnet34_fp16_3x224x224 | 0.0938 | 17.7 | 38.6 | -| resnet50_fp16_3x224x224 | 0.123 | 13 | 27.7 | -| resnet101_fp16_3x224x224 | 0 | 5.56 | 15.1 | -| resnet152_fp16_3x224x224 | 0 | 5.01 | 10.8 | -| densenet121_fp16_3x224x224 | 0.00488 | 10.7 | 38.5 | -| densenet169_fp16_3x224x224 | 0.00488 | 8.02 | 31.2 | -| densenet201_fp16_3x224x224 | 0.00537 | 5.01 | 8.41 | -| densenet161_fp16_3x224x224 | 0.00635 | 4.67 | 11.9 | -| vgg11_fp16_3x224x224 | 0.00104 | 15 | 14.7 | -| vgg13_fp16_3x224x224 | 0.000504 | 10.5 | 11.8 | -| vgg16_fp16_3x224x224 | 0.000565 | 7.23 | 10.3 | -| vgg11_bn_fp16_3x224x224 | 0.000626 | 13.4 | 15.8 | -| vgg13_bn_fp16_3x224x224 | 0.000908 | 9.19 | 12.9 | -| vgg16_bn_fp16_3x224x224 | 0.00107 | 6.61 | 11 | +We tested the converter against these models using [this script](torch2trt/test.py). + +| Model | Jetson Nano | Jetson Xavier | +|-------|-------------|---------------| +| alexnet | | | +| squeezenet1_0 | | | +| squeezenet1_1 | | | +| resnet18 | | | +| resnet34 | | | +| resnet50 | | | +| resnet101 | | | +| resnet152 | | | +| densenet121 | | | +| densenet169 | | | +| densenet201 | | | +| densenet161 | | | +| vgg11 | | | +| vgg13 | | | +| vgg16 | | | +| vgg19 | | | +| vgg11_bn | | | +| vgg13_bn | | | +| vgg16_bn | | | +| vgg19_bn | | | ### How does it work? From 58671785f95b645e5cfe403dee3bb446410f7df9 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sat, 3 Feb 2018 05:38:26 -0800 Subject: [PATCH 078/355] added nano benchmarks --- BENCHMARKS_NANO.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 BENCHMARKS_NANO.md diff --git a/BENCHMARKS_NANO.md b/BENCHMARKS_NANO.md new file mode 100644 index 00000000..a89cc273 --- /dev/null +++ b/BENCHMARKS_NANO.md @@ -0,0 +1,20 @@ +| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | FPS (PyTorch) | FPS (TensorRT) | +|------|-----------|--------------|------------------|-----------|---------------|----------------| +| torchvision.models.alexnet.alexnet | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.91E-05 | 45.3 | 67.5 | +| torchvision.models.squeezenet.squeezenet1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.24E-02 | 40.5 | 130 | +| torchvision.models.squeezenet.squeezenet1_1 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.46E-03 | 69.1 | 229 | +| torchvision.models.resnet.resnet18 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.93E-03 | 28.6 | 87.6 | +| torchvision.models.resnet.resnet34 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.56E-01 | 15.5 | 49.6 | +| torchvision.models.resnet.resnet50 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.91E-02 | 11.3 | 33.4 | +| torchvision.models.resnet.resnet101 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 7.05 | 19.7 | +| torchvision.models.resnet.resnet152 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 4.74 | 13.9 | +| torchvision.models.densenet.densenet121 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.38E-03 | 11.1 | 40.3 | +| torchvision.models.densenet.densenet169 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.93E-03 | 8.13 | 31.9 | +| torchvision.models.densenet.densenet201 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.44E-03 | 6.84 | 24.5 | +| torchvision.models.densenet.densenet161 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.64E-03 | 4.01 | 15.2 | +| torchvision.models.vgg.vgg11 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.81E-04 | 8.79 | 18 | +| torchvision.models.vgg.vgg13 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.36E-04 | 6.4 | 14.4 | +| torchvision.models.vgg.vgg16 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.83E-04 | 4.96 | 11.7 | +| torchvision.models.vgg.vgg11_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.66E-04 | 8.46 | 18.2 | +| torchvision.models.vgg.vgg13_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.43E-04 | 6.16 | 14.5 | +| torchvision.models.vgg.vgg16_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.37E-04 | 4.83 | 11.8 | From 391f796b2b7c623b5aba4fe1490dc439fb41aae3 Mon Sep 17 00:00:00 2001 From: John Date: Wed, 19 Jun 2019 13:03:00 -0700 Subject: [PATCH 079/355] Update README.md --- README.md | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index b0d54531..db3068bb 100644 --- a/README.md +++ b/README.md @@ -72,27 +72,27 @@ model_trt.load_state_dict(torch.load('alexnet_trt.pth')) We tested the converter against these models using [this script](torch2trt/test.py). -| Model | Jetson Nano | Jetson Xavier | -|-------|-------------|---------------| -| alexnet | | | -| squeezenet1_0 | | | -| squeezenet1_1 | | | -| resnet18 | | | -| resnet34 | | | -| resnet50 | | | -| resnet101 | | | -| resnet152 | | | -| densenet121 | | | -| densenet169 | | | -| densenet201 | | | -| densenet161 | | | -| vgg11 | | | -| vgg13 | | | -| vgg16 | | | +| Model | Nano (PyTorch) | Nano (TensorRT) | +|-------|:--------------:|:---------------:| +| alexnet | 45.3 | 67.5 | +| squeezenet1_0 | 40.5 | 130 | +| squeezenet1_1 | 69.1 | 229 | +| resnet18 | 28.6 | 87.6 | +| resnet34 | 15.5 | 49.6 | +| resnet50 | 11.3 | 33.4 | +| resnet101 | 7.05 | 19.7 | +| resnet152 | 4.74 | 13.9 | +| densenet121 | 11.1 | 40.3 | +| densenet169 | 8.13 | 31.9 | +| densenet201 | 6.84 | 24.5 | +| densenet161 | 4.01 | 15.2 | +| vgg11 | 8.79 | 18 | +| vgg13 | 6.4 | 14.4 | +| vgg16 | 4.96 | 11.7 | | vgg19 | | | -| vgg11_bn | | | -| vgg13_bn | | | -| vgg16_bn | | | +| vgg11_bn | 8.46 | 18.2 | +| vgg13_bn | 6.16 | 14.5 | +| vgg16_bn | 4.83 | 11.8 | | vgg19_bn | | | From 4fc880dda036315a600d9368403414132d0adf05 Mon Sep 17 00:00:00 2001 From: John Date: Wed, 19 Jun 2019 13:04:02 -0700 Subject: [PATCH 080/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index db3068bb..dd44a11c 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ model_trt = TRTModule() model_trt.load_state_dict(torch.load('alexnet_trt.pth')) ``` -### Tested models +### Models We tested the converter against these models using [this script](torch2trt/test.py). From b195c4f006b64828c2c5a4d233e04babff95092c Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sat, 3 Feb 2018 05:54:52 -0800 Subject: [PATCH 081/355] moved benchmarks --- BENCHMARKS_NANO.md => benchmarks/JETSON_NANO.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename BENCHMARKS_NANO.md => benchmarks/JETSON_NANO.md (100%) diff --git a/BENCHMARKS_NANO.md b/benchmarks/JETSON_NANO.md similarity index 100% rename from BENCHMARKS_NANO.md rename to benchmarks/JETSON_NANO.md From c2097840438bf76b690a898683082ce62541585e Mon Sep 17 00:00:00 2001 From: John Date: Wed, 19 Jun 2019 13:08:58 -0700 Subject: [PATCH 082/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index dd44a11c..b3b07d5b 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ model_trt.load_state_dict(torch.load('alexnet_trt.pth')) ### Models -We tested the converter against these models using [this script](torch2trt/test.py). +We tested the converter against these models using [this script](torch2trt/test.py). For more details, see the [raw output](benchmarks) from ``test.sh`` which calls [torch2trt/test.py](torch2trt/test.py). | Model | Nano (PyTorch) | Nano (TensorRT) | |-------|:--------------:|:---------------:| From 85bb49aadc0f6160ec18add3b6f9acb6d3a74983 Mon Sep 17 00:00:00 2001 From: John Date: Wed, 19 Jun 2019 13:30:33 -0700 Subject: [PATCH 083/355] Update README.md --- README.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b3b07d5b..19f5ad16 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,16 @@ model_trt.load_state_dict(torch.load('alexnet_trt.pth')) ### Models -We tested the converter against these models using [this script](torch2trt/test.py). For more details, see the [raw output](benchmarks) from ``test.sh`` which calls [torch2trt/test.py](torch2trt/test.py). +We tested the converter against these models using the [test.sh](test.sh) script. You can generate the results by calling + +```bash +bash test.sh TEST_OUTPUT.md +``` + +Below shows the execution time in FPS of each model. You can find the raw output in the [benchmarks](benchmarks) folder. + +> Even though we report the results below in FPS, they are actually a measure of the model's *latency*. We use batch size 1 and perform [synchronization](https://github.com/NVIDIA-AI-IOT-private/torch2trt/blob/master/torch2trt/test.py#L61) after every model execution call. Higher *throughput* may be possible by asynchronous execution and increased batch size. + | Model | Nano (PyTorch) | Nano (TensorRT) | |-------|:--------------:|:---------------:| From 468012953a0a8d0c604c7faba39be1fd2584b9a6 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 28 Jan 2018 12:01:58 -0500 Subject: [PATCH 084/355] xavier benchmarks --- benchmarks/JETSON_XAVIER.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 benchmarks/JETSON_XAVIER.md diff --git a/benchmarks/JETSON_XAVIER.md b/benchmarks/JETSON_XAVIER.md new file mode 100644 index 00000000..a02248cb --- /dev/null +++ b/benchmarks/JETSON_XAVIER.md @@ -0,0 +1,26 @@ +| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | Throughput (PyTorch) | Throughput (TensorRT) | Latency (PyTorch) | Latency (TensorRT) | +|------|-----------|--------------|------------------|-----------|----------------------|-----------------------|-------------------|--------------------| +| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | Throughput (PyTorch) | Throughput (TensorRT) | Latency (PyTorch) | Latency (TensorRT) | +|------|-----------|--------------|------------------|-----------|----------------------|-----------------------|-------------------|--------------------| +| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | Throughput (PyTorch) | Throughput (TensorRT) | Latency (PyTorch) | Latency (TensorRT) | +|------|-----------|--------------|------------------|-----------|----------------------|-----------------------|-------------------|--------------------| +| torchvision.models.alexnet.alexnet | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.29E-05 | 250 | 580 | 4.75 | 1.93 | +| torchvision.models.squeezenet.squeezenet1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.03E-02 | 130 | 890 | 7.31 | 1.37 | +| torchvision.models.squeezenet.squeezenet1_1 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.95E-03 | 132 | 1.39e+03 | 7.41 | 0.951 | +| torchvision.models.resnet.resnet18 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.37E-03 | 140 | 712 | 7.1 | 1.64 | +| torchvision.models.resnet.resnet34 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.09E-01 | 79.2 | 393 | 12.6 | 2.79 | +| torchvision.models.resnet.resnet50 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.57E-02 | 55.5 | 312 | 17.6 | 3.48 | +| torchvision.models.resnet.resnet101 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 28.5 | 170 | 34.8 | 6.22 | +| torchvision.models.resnet.resnet152 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 18.9 | 121 | 52.1 | 8.58 | +| torchvision.models.densenet.densenet121 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.95E-03 | 23 | 168 | 43.3 | 6.37 | +| torchvision.models.densenet.densenet169 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.39E-03 | 16.3 | 118 | 60.2 | 8.83 | +| torchvision.models.densenet.densenet201 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.03E-03 | 13.3 | 90.9 | 72.7 | 11.4 | +| torchvision.models.densenet.densenet161 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.91E-03 | 17.2 | 82.4 | 56.3 | 12.6 | +| torchvision.models.vgg.vgg11 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 7.32E-04 | 85.2 | 201 | 12 | 5.16 | +| torchvision.models.vgg.vgg13 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 8.24E-04 | 71.9 | 166 | 14.2 | 6.27 | +| torchvision.models.vgg.vgg16 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.01E-03 | 61.7 | 139 | 16.6 | 7.46 | +| torchvision.models.vgg.vgg19 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.80E-03 | 54.1 | 121 | 18.8 | 8.52 | +| torchvision.models.vgg.vgg11_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.80E-04 | 81.8 | 201 | 12.5 | 5.16 | +| torchvision.models.vgg.vgg13_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 6.03E-04 | 68 | 166 | 15 | 6.27 | +| torchvision.models.vgg.vgg16_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.45E-03 | 58.5 | 140 | 17.4 | 7.41 | +| torchvision.models.vgg.vgg19_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 6.64E-04 | 51.4 | 121 | 19.8 | 8.52 | From 5eeddca78928c9b6f00f086975a4267e13a1f9eb Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sat, 3 Feb 2018 08:24:53 -0800 Subject: [PATCH 085/355] added jetson nano benchmarks --- benchmarks/JETSON_NANO.md | 42 ++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/benchmarks/JETSON_NANO.md b/benchmarks/JETSON_NANO.md index a89cc273..9efbed6c 100644 --- a/benchmarks/JETSON_NANO.md +++ b/benchmarks/JETSON_NANO.md @@ -1,20 +1,22 @@ -| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | FPS (PyTorch) | FPS (TensorRT) | -|------|-----------|--------------|------------------|-----------|---------------|----------------| -| torchvision.models.alexnet.alexnet | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.91E-05 | 45.3 | 67.5 | -| torchvision.models.squeezenet.squeezenet1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.24E-02 | 40.5 | 130 | -| torchvision.models.squeezenet.squeezenet1_1 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.46E-03 | 69.1 | 229 | -| torchvision.models.resnet.resnet18 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.93E-03 | 28.6 | 87.6 | -| torchvision.models.resnet.resnet34 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.56E-01 | 15.5 | 49.6 | -| torchvision.models.resnet.resnet50 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.91E-02 | 11.3 | 33.4 | -| torchvision.models.resnet.resnet101 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 7.05 | 19.7 | -| torchvision.models.resnet.resnet152 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 4.74 | 13.9 | -| torchvision.models.densenet.densenet121 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.38E-03 | 11.1 | 40.3 | -| torchvision.models.densenet.densenet169 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.93E-03 | 8.13 | 31.9 | -| torchvision.models.densenet.densenet201 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.44E-03 | 6.84 | 24.5 | -| torchvision.models.densenet.densenet161 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.64E-03 | 4.01 | 15.2 | -| torchvision.models.vgg.vgg11 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.81E-04 | 8.79 | 18 | -| torchvision.models.vgg.vgg13 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.36E-04 | 6.4 | 14.4 | -| torchvision.models.vgg.vgg16 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.83E-04 | 4.96 | 11.7 | -| torchvision.models.vgg.vgg11_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.66E-04 | 8.46 | 18.2 | -| torchvision.models.vgg.vgg13_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.43E-04 | 6.16 | 14.5 | -| torchvision.models.vgg.vgg16_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.37E-04 | 4.83 | 11.8 | +| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | Throughput (PyTorch) | Throughput (TensorRT) | Latency (PyTorch) | Latency (TensorRT) | +|------|-----------|--------------|------------------|-----------|----------------------|-----------------------|-------------------|--------------------| +| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | Throughput (PyTorch) | Throughput (TensorRT) | Latency (PyTorch) | Latency (TensorRT) | +|------|-----------|--------------|------------------|-----------|----------------------|-----------------------|-------------------|--------------------| +| torchvision.models.alexnet.alexnet | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.29E-05 | 46.4 | 69.9 | 22.1 | 14.7 | +| torchvision.models.squeezenet.squeezenet1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.20E-02 | 44 | 137 | 24.2 | 7.6 | +| torchvision.models.squeezenet.squeezenet1_1 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.77E-04 | 76.6 | 248 | 14 | 4.34 | +| torchvision.models.resnet.resnet18 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.86E-03 | 29.4 | 90.2 | 34.7 | 11.4 | +| torchvision.models.resnet.resnet34 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.56E-01 | 15.5 | 50.7 | 64.8 | 20.2 | +| torchvision.models.resnet.resnet50 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 6.45E-02 | 12.4 | 34.2 | 81.7 | 29.8 | +| torchvision.models.resnet.resnet101 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.01E+03 | 7.18 | 19.9 | 141 | 51.1 | +| torchvision.models.resnet.resnet152 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 4.96 | 14.1 | 204 | 72.3 | +| torchvision.models.densenet.densenet121 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.42E-03 | 11.5 | 41.9 | 84.5 | 24.8 | +| torchvision.models.densenet.densenet169 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.86E-03 | 8.25 | 33.2 | 118 | 31.2 | +| torchvision.models.densenet.densenet201 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.42E-03 | 6.84 | 25.4 | 141 | 40.8 | +| torchvision.models.densenet.densenet161 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.15E-03 | 4.71 | 15.6 | 247 | 65.8 | +| torchvision.models.vgg.vgg11 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.51E-04 | 8.9 | 18.3 | 114 | 55.1 | +| torchvision.models.vgg.vgg13 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.07E-04 | 6.53 | 14.7 | 156 | 68.7 | +| torchvision.models.vgg.vgg16 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.58E-04 | 5.09 | 11.9 | 201 | 85.1 | +| torchvision.models.vgg.vgg11_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.81E-04 | 8.74 | 18.4 | 117 | 54.8 | +| torchvision.models.vgg.vgg13_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.19E-04 | 6.31 | 14.8 | 162 | 68.5 | +| torchvision.models.vgg.vgg16_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.77E-04 | 4.96 | 12 | 207 | 84.3 | From 985e1ff72a7872426dd6c574ef9b6aac8e69dfac Mon Sep 17 00:00:00 2001 From: John Date: Wed, 19 Jun 2019 14:09:27 -0700 Subject: [PATCH 086/355] Update JETSON_XAVIER.md --- benchmarks/JETSON_XAVIER.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/benchmarks/JETSON_XAVIER.md b/benchmarks/JETSON_XAVIER.md index a02248cb..fc532d57 100644 --- a/benchmarks/JETSON_XAVIER.md +++ b/benchmarks/JETSON_XAVIER.md @@ -1,9 +1,5 @@ | Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | Throughput (PyTorch) | Throughput (TensorRT) | Latency (PyTorch) | Latency (TensorRT) | |------|-----------|--------------|------------------|-----------|----------------------|-----------------------|-------------------|--------------------| -| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | Throughput (PyTorch) | Throughput (TensorRT) | Latency (PyTorch) | Latency (TensorRT) | -|------|-----------|--------------|------------------|-----------|----------------------|-----------------------|-------------------|--------------------| -| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | Throughput (PyTorch) | Throughput (TensorRT) | Latency (PyTorch) | Latency (TensorRT) | -|------|-----------|--------------|------------------|-----------|----------------------|-----------------------|-------------------|--------------------| | torchvision.models.alexnet.alexnet | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.29E-05 | 250 | 580 | 4.75 | 1.93 | | torchvision.models.squeezenet.squeezenet1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.03E-02 | 130 | 890 | 7.31 | 1.37 | | torchvision.models.squeezenet.squeezenet1_1 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.95E-03 | 132 | 1.39e+03 | 7.41 | 0.951 | From 2f969945729555093c144f90fcf9c9cc77696200 Mon Sep 17 00:00:00 2001 From: John Date: Wed, 19 Jun 2019 14:58:15 -0700 Subject: [PATCH 087/355] Update README.md --- README.md | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 19f5ad16..8c6f4085 100644 --- a/README.md +++ b/README.md @@ -81,28 +81,28 @@ Below shows the execution time in FPS of each model. You can find the raw outpu > Even though we report the results below in FPS, they are actually a measure of the model's *latency*. We use batch size 1 and perform [synchronization](https://github.com/NVIDIA-AI-IOT-private/torch2trt/blob/master/torch2trt/test.py#L61) after every model execution call. Higher *throughput* may be possible by asynchronous execution and increased batch size. -| Model | Nano (PyTorch) | Nano (TensorRT) | -|-------|:--------------:|:---------------:| -| alexnet | 45.3 | 67.5 | -| squeezenet1_0 | 40.5 | 130 | -| squeezenet1_1 | 69.1 | 229 | -| resnet18 | 28.6 | 87.6 | -| resnet34 | 15.5 | 49.6 | -| resnet50 | 11.3 | 33.4 | -| resnet101 | 7.05 | 19.7 | -| resnet152 | 4.74 | 13.9 | -| densenet121 | 11.1 | 40.3 | -| densenet169 | 8.13 | 31.9 | -| densenet201 | 6.84 | 24.5 | -| densenet161 | 4.01 | 15.2 | -| vgg11 | 8.79 | 18 | -| vgg13 | 6.4 | 14.4 | -| vgg16 | 4.96 | 11.7 | -| vgg19 | | | -| vgg11_bn | 8.46 | 18.2 | -| vgg13_bn | 6.16 | 14.5 | -| vgg16_bn | 4.83 | 11.8 | -| vgg19_bn | | | +| Model | Nano (PyTorch) | Nano (TensorRT) | Xavier (PyTorch) | Xavier (TensorRT) | +|-------|:--------------:|:---------------:|:----------------:|:-----------------:| +| alexnet | | | 250 | 580 | +| squeezenet1_0 | | | 130 | 890 | +| squeezenet1_1 | | | 132 | 1390 | +| resnet18 | | | 140 | 712 | +| resnet34 | | | 79.2 | 393 | +| resnet50 | | | 55.5 | 312 | +| resnet101 | | | 28.5 | 170 | +| resnet152 | | | 18.9 | 121 | +| densenet121 | | | 23.0 | 168 | +| densenet169 | | | 16.3 | 118 | +| densenet201 | | | 13.3 | 90.9 | +| densenet161 | | | 17.2 | 82.4 | +| vgg11 | | | 85.2 | 201 | +| vgg13 | | | 71.9 | 166 | +| vgg16 | | | 61.7 | 139 | +| vgg19 | | | 54.1 | 121 | +| vgg11_bn | | | 81.8 | 201 | +| vgg13_bn | | | 68.0 | 166 | +| vgg16_bn | | | 58.5 | 140 | +| vgg19_bn | | | 51.4 | 121 | ### How does it work? From 4889be78b9ab5d07c1623e7045b6c3011536c592 Mon Sep 17 00:00:00 2001 From: John Date: Wed, 19 Jun 2019 14:59:10 -0700 Subject: [PATCH 088/355] Update README.md --- README.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/README.md b/README.md index 8c6f4085..438c9fa7 100644 --- a/README.md +++ b/README.md @@ -76,10 +76,7 @@ We tested the converter against these models using the [test.sh](test.sh) script bash test.sh TEST_OUTPUT.md ``` -Below shows the execution time in FPS of each model. You can find the raw output in the [benchmarks](benchmarks) folder. - -> Even though we report the results below in FPS, they are actually a measure of the model's *latency*. We use batch size 1 and perform [synchronization](https://github.com/NVIDIA-AI-IOT-private/torch2trt/blob/master/torch2trt/test.py#L61) after every model execution call. Higher *throughput* may be possible by asynchronous execution and increased batch size. - +Below shows the throughput of each model in FPS. You can find the raw output (which includes latency) in the [benchmarks](benchmarks) folder. | Model | Nano (PyTorch) | Nano (TensorRT) | Xavier (PyTorch) | Xavier (TensorRT) | |-------|:--------------:|:---------------:|:----------------:|:-----------------:| From e1f26c412f491ddd94162dc5028817c163207ecd Mon Sep 17 00:00:00 2001 From: John Date: Wed, 19 Jun 2019 15:37:33 -0700 Subject: [PATCH 089/355] Update JETSON_NANO.md --- benchmarks/JETSON_NANO.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/benchmarks/JETSON_NANO.md b/benchmarks/JETSON_NANO.md index 9efbed6c..0dab2486 100644 --- a/benchmarks/JETSON_NANO.md +++ b/benchmarks/JETSON_NANO.md @@ -1,7 +1,5 @@ | Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | Throughput (PyTorch) | Throughput (TensorRT) | Latency (PyTorch) | Latency (TensorRT) | |------|-----------|--------------|------------------|-----------|----------------------|-----------------------|-------------------|--------------------| -| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | Throughput (PyTorch) | Throughput (TensorRT) | Latency (PyTorch) | Latency (TensorRT) | -|------|-----------|--------------|------------------|-----------|----------------------|-----------------------|-------------------|--------------------| | torchvision.models.alexnet.alexnet | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.29E-05 | 46.4 | 69.9 | 22.1 | 14.7 | | torchvision.models.squeezenet.squeezenet1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.20E-02 | 44 | 137 | 24.2 | 7.6 | | torchvision.models.squeezenet.squeezenet1_1 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.77E-04 | 76.6 | 248 | 14 | 4.34 | From 881908a9853d04cced310bd7dc31a1a596be9022 Mon Sep 17 00:00:00 2001 From: John Date: Wed, 19 Jun 2019 15:40:26 -0700 Subject: [PATCH 090/355] Update README.md --- README.md | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 438c9fa7..f691baae 100644 --- a/README.md +++ b/README.md @@ -80,25 +80,25 @@ Below shows the throughput of each model in FPS. You can find the raw output (w | Model | Nano (PyTorch) | Nano (TensorRT) | Xavier (PyTorch) | Xavier (TensorRT) | |-------|:--------------:|:---------------:|:----------------:|:-----------------:| -| alexnet | | | 250 | 580 | -| squeezenet1_0 | | | 130 | 890 | -| squeezenet1_1 | | | 132 | 1390 | -| resnet18 | | | 140 | 712 | -| resnet34 | | | 79.2 | 393 | -| resnet50 | | | 55.5 | 312 | -| resnet101 | | | 28.5 | 170 | -| resnet152 | | | 18.9 | 121 | -| densenet121 | | | 23.0 | 168 | -| densenet169 | | | 16.3 | 118 | -| densenet201 | | | 13.3 | 90.9 | -| densenet161 | | | 17.2 | 82.4 | -| vgg11 | | | 85.2 | 201 | -| vgg13 | | | 71.9 | 166 | -| vgg16 | | | 61.7 | 139 | +| alexnet | 46.4 | 69.9 | 250 | 580 | +| squeezenet1_0 | 44 | 137 | 130 | 890 | +| squeezenet1_1 | 76.6 | 248 | 132 | 1390 | +| resnet18 | 29.4 | 90.2 | 140 | 712 | +| resnet34 | 15.5 | 50.7 | 79.2 | 393 | +| resnet50 | 12.4 | 34.2 | 55.5 | 312 | +| resnet101 | 7.18 | 19.9 | 28.5 | 170 | +| resnet152 | 4.96 | 14.1 | 18.9 | 121 | +| densenet121 | 11.5 | 41.9 | 23.0 | 168 | +| densenet169 | 8.25 | 33.2 | 16.3 | 118 | +| densenet201 | 6.84 | 25.4 | 13.3 | 90.9 | +| densenet161 | 4.71 | 15.6 | 17.2 | 82.4 | +| vgg11 | 8.9 | 18.3 | 85.2 | 201 | +| vgg13 | 6.53 | 14.7 | 71.9 | 166 | +| vgg16 | 5.09 | 11.9 | 61.7 | 139 | | vgg19 | | | 54.1 | 121 | -| vgg11_bn | | | 81.8 | 201 | -| vgg13_bn | | | 68.0 | 166 | -| vgg16_bn | | | 58.5 | 140 | +| vgg11_bn | 8.74 | 18.4 | 81.8 | 201 | +| vgg13_bn | 6.31 | 14.8 | 68.0 | 166 | +| vgg16_bn | 4.96 | 12.0 | 58.5 | 140 | | vgg19_bn | | | 51.4 | 121 | From 5ae5bd67d2a7da952d7c576779f865a9e028404d Mon Sep 17 00:00:00 2001 From: John Date: Wed, 19 Jun 2019 19:02:41 -0700 Subject: [PATCH 091/355] Update README.md --- README.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/README.md b/README.md index f691baae..97c1978e 100644 --- a/README.md +++ b/README.md @@ -142,10 +142,3 @@ the following * ``ctx.method_return`` - The value returned by the specified PyTorch function. The converter must set the ``_trt`` attribute where relevant. Please see the ``torch2trt.py`` module for more examples. - -### A comment on variable size tensors - -TensorRT currently does not support variable size Tensors, so whatever input shape you use when converting, you must use -when executing. While this may seem -limiting, it can actually be a good constraint when designing your model for use in embedded systems. By -restricting to a fixed input size, we can expect similar memory usage and runtime. From 6a2896bd6340a66aab507d86383a6f6f46ed9a60 Mon Sep 17 00:00:00 2001 From: John Date: Wed, 19 Jun 2019 19:03:27 -0700 Subject: [PATCH 092/355] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 97c1978e..9e9d7d0c 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,8 @@ If you find an issue, please [let us know](../..//issues)! ### Setup ```bash +git clone https://github.com/NVIDIA-AI-IOT/torch2trt +cd torch2trt python setup.py install --user ``` From 256d0ac6c3a47b24897a11e8ada118c1dc7096c8 Mon Sep 17 00:00:00 2001 From: John Date: Wed, 19 Jun 2019 19:05:05 -0700 Subject: [PATCH 093/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9e9d7d0c..29f83f13 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ We tested the converter against these models using the [test.sh](test.sh) script bash test.sh TEST_OUTPUT.md ``` -Below shows the throughput of each model in FPS. You can find the raw output (which includes latency) in the [benchmarks](benchmarks) folder. +> The results below show the throughput in FPS. You can find the raw output, which includes latency, in the [benchmarks folder](benchmarks). | Model | Nano (PyTorch) | Nano (TensorRT) | Xavier (PyTorch) | Xavier (TensorRT) | |-------|:--------------:|:---------------:|:----------------:|:-----------------:| From 27a2edb87261d932ef9181d249660c73cad7f0c9 Mon Sep 17 00:00:00 2001 From: John Date: Thu, 20 Jun 2019 11:53:07 -0700 Subject: [PATCH 094/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 29f83f13..afccf6a6 100644 --- a/README.md +++ b/README.md @@ -143,4 +143,4 @@ the following * ``ctx.method_kwargs`` - Keyword arguments that were passed to the specified PyTorch function. * ``ctx.method_return`` - The value returned by the specified PyTorch function. The converter must set the ``_trt`` attribute where relevant. -Please see the ``torch2trt.py`` module for more examples. +Please see [this folder](torch2trt/converters) for more examples. From e7e8902779d8231f6deaec1c942530d260f3d642 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 20 Jun 2019 12:18:43 -0700 Subject: [PATCH 095/355] changed test to executable --- test.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) mode change 100644 => 100755 test.sh diff --git a/test.sh b/test.sh old mode 100644 new mode 100755 index 3706bb90..54b3764e --- a/test.sh +++ b/test.sh @@ -1,3 +1,5 @@ +#!/bin/bash + OUTPUT_FILE=$1 touch $OUTPUT_FILE @@ -24,4 +26,4 @@ python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg19$ python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg11_bn python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg13_bn python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg16_bn -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg19_bn \ No newline at end of file +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg19_bn From b191521d3aea8df6e62686a1f22801984cc70ce8 Mon Sep 17 00:00:00 2001 From: John Date: Thu, 20 Jun 2019 21:04:56 -0700 Subject: [PATCH 096/355] Update README.md --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index afccf6a6..2c9c5d2e 100644 --- a/README.md +++ b/README.md @@ -144,3 +144,12 @@ the following * ``ctx.method_return`` - The value returned by the specified PyTorch function. The converter must set the ``_trt`` attribute where relevant. Please see [this folder](torch2trt/converters) for more examples. + +## See also + +- [JetBot](http://github.com/NVIDIA-AI-IOT/jetbot) - An educational AI robot based on NVIDIA Jetson Nano + +- [JetCam](http://github.com/NVIDIA-AI-IOT/jetcam) - An easy to use Python camera interface for NVIDIA Jetson +- [JetCard](http://github.com/NVIDIA-AI-IOT/jetcard) - An SD card image for web programming AI projects with NVIDIA Jetson Nano +- [torch2trt](http://github.com/NVIDIA-AI-IOT/torch2trt) - An easy to use PyTorch to TensorRT converter + From f1740290a8e57c2529fb7fae44a1381a985399f3 Mon Sep 17 00:00:00 2001 From: John Date: Thu, 20 Jun 2019 21:05:22 -0700 Subject: [PATCH 097/355] Update README.md --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 2c9c5d2e..9b740f9e 100644 --- a/README.md +++ b/README.md @@ -151,5 +151,3 @@ Please see [this folder](torch2trt/converters) for more examples. - [JetCam](http://github.com/NVIDIA-AI-IOT/jetcam) - An easy to use Python camera interface for NVIDIA Jetson - [JetCard](http://github.com/NVIDIA-AI-IOT/jetcard) - An SD card image for web programming AI projects with NVIDIA Jetson Nano -- [torch2trt](http://github.com/NVIDIA-AI-IOT/torch2trt) - An easy to use PyTorch to TensorRT converter - From b28afb915e9b51f6617d66b58442e9671baa7b5c Mon Sep 17 00:00:00 2001 From: John Date: Thu, 20 Jun 2019 21:12:00 -0700 Subject: [PATCH 098/355] Update README.md --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 9b740f9e..99d6b92f 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ If you find an issue, please [let us know](../..//issues)! > Please note, this converter has limited coverage of TensorRT / PyTorch. We created it primarily > to easily optimize the models used in the [JetBot](https://github.com/NVIDIA-AI-IOT/jetbot) project. If you find the converter helpful with other models, please [let us know](../..//issues). -### Setup +## Setup ```bash git clone https://github.com/NVIDIA-AI-IOT/torch2trt @@ -20,11 +20,11 @@ cd torch2trt python setup.py install --user ``` -### Usage +## Usage Below are some usage examples, for more check out the [notebooks](notebooks). -#### Convert +### Convert ```python from torch2trt import torch2trt @@ -40,7 +40,7 @@ x = torch.ones((1, 3, 224, 224)).cuda() model_trt = torch2trt(model, [x]) ``` -#### Execute +### Execute We can execute returned ``TRTModule`` just like the original PyTorch model @@ -52,7 +52,7 @@ y_trt = model_trt(x) print(torch.max(torch.abs(y - y_trt))) ``` -#### Save and load +### Save and load We can save the model as a ``state_dict``. @@ -70,7 +70,7 @@ model_trt = TRTModule() model_trt.load_state_dict(torch.load('alexnet_trt.pth')) ``` -### Models +## Models We tested the converter against these models using the [test.sh](test.sh) script. You can generate the results by calling @@ -104,7 +104,7 @@ bash test.sh TEST_OUTPUT.md | vgg19_bn | | | 51.4 | 121 | -### How does it work? +## How does it work? This converter works by attaching conversion functions (like ``convert_ReLU``) to the original PyTorch functional calls (like ``torch.nn.ReLU.forward``). The sample input data is passed @@ -117,7 +117,7 @@ uses this ``_trt`` to add layers to the TensorRT network, and then sets the ``_t relevant output tensors. Once the model is fully executed, the final tensors returns are marked as outputs of the TensorRT network, and the optimized TensorRT engine is built. -### How to add (or override) a converter +## How to add (or override) a converter Here we show how to add a converter for the ``ReLU`` module using the TensorRT python API. From 759a525175caf811fe8d88f23e237cae1706f67b Mon Sep 17 00:00:00 2001 From: John Date: Thu, 20 Jun 2019 21:14:47 -0700 Subject: [PATCH 099/355] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 99d6b92f..5f8547f8 100644 --- a/README.md +++ b/README.md @@ -17,9 +17,11 @@ If you find an issue, please [let us know](../..//issues)! ```bash git clone https://github.com/NVIDIA-AI-IOT/torch2trt cd torch2trt -python setup.py install --user +sudo python setup.py install ``` +> JetCam is tested against a system configured with the [JetCard](http://github.com/NVIDIA-AI-IOT/jetcard) setup. Different system configurations may require additional steps. + ## Usage Below are some usage examples, for more check out the [notebooks](notebooks). From 5274c98ea443b296ae65a544cc85d29726a2a5ae Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 20 Jun 2019 21:20:30 -0700 Subject: [PATCH 100/355] fixed live demo nb --- notebooks/resnet18/live_demo.ipynb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/notebooks/resnet18/live_demo.ipynb b/notebooks/resnet18/live_demo.ipynb index 08e11431..8f217f1f 100644 --- a/notebooks/resnet18/live_demo.ipynb +++ b/notebooks/resnet18/live_demo.ipynb @@ -38,7 +38,7 @@ "source": [ "from jetcam.utils import bgr8_to_jpeg\n", "import traitlets\n", - "import ipywidgetsb\n", + "import ipywidgets\n", "\n", "image_w = ipywidgets.Image()\n", "\n", @@ -60,6 +60,7 @@ "metadata": {}, "outputs": [], "source": [ + "import torch\n", "from torch2trt import TRTModule\n", "\n", "model_trt = TRTModule()\n", @@ -81,6 +82,7 @@ "source": [ "import cv2\n", "import numpy as np\n", + "import torchvision\n", "\n", "device = torch.device('cuda')\n", "mean = 255.0 * np.array([0.485, 0.456, 0.406])\n", From e4c8559ce1a07620c090308f304052b0b6fd39e1 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 20 Jun 2019 21:21:59 -0700 Subject: [PATCH 101/355] rename nb --- notebooks/{resnet18 => image_classification}/conversion.ipynb | 0 notebooks/{resnet18 => image_classification}/imagenet_labels.json | 0 notebooks/{resnet18 => image_classification}/live_demo.ipynb | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename notebooks/{resnet18 => image_classification}/conversion.ipynb (100%) rename notebooks/{resnet18 => image_classification}/imagenet_labels.json (100%) rename notebooks/{resnet18 => image_classification}/live_demo.ipynb (100%) diff --git a/notebooks/resnet18/conversion.ipynb b/notebooks/image_classification/conversion.ipynb similarity index 100% rename from notebooks/resnet18/conversion.ipynb rename to notebooks/image_classification/conversion.ipynb diff --git a/notebooks/resnet18/imagenet_labels.json b/notebooks/image_classification/imagenet_labels.json similarity index 100% rename from notebooks/resnet18/imagenet_labels.json rename to notebooks/image_classification/imagenet_labels.json diff --git a/notebooks/resnet18/live_demo.ipynb b/notebooks/image_classification/live_demo.ipynb similarity index 100% rename from notebooks/resnet18/live_demo.ipynb rename to notebooks/image_classification/live_demo.ipynb From 86741758f4ee17fbf85bd65645865e3e10d1f066 Mon Sep 17 00:00:00 2001 From: John Date: Thu, 20 Jun 2019 21:23:22 -0700 Subject: [PATCH 102/355] Create LICENSE.md --- LICENSE.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 LICENSE.md diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 00000000..3e950018 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,7 @@ +Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. From 05e2d5a947b31b25036628f393c222b13dc16662 Mon Sep 17 00:00:00 2001 From: John Date: Thu, 20 Jun 2019 21:39:59 -0700 Subject: [PATCH 103/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5f8547f8..cc089234 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ model_trt.load_state_dict(torch.load('alexnet_trt.pth')) We tested the converter against these models using the [test.sh](test.sh) script. You can generate the results by calling ```bash -bash test.sh TEST_OUTPUT.md +./test.sh TEST_OUTPUT.md ``` > The results below show the throughput in FPS. You can find the raw output, which includes latency, in the [benchmarks folder](benchmarks). From 7f2b8cb73c61bce94e5b02397c47e56f0ee95563 Mon Sep 17 00:00:00 2001 From: John Date: Thu, 20 Jun 2019 23:57:28 -0700 Subject: [PATCH 104/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cc089234..e0f04683 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ cd torch2trt sudo python setup.py install ``` -> JetCam is tested against a system configured with the [JetCard](http://github.com/NVIDIA-AI-IOT/jetcard) setup. Different system configurations may require additional steps. +> torch2trt is tested against a system configured with the [JetCard](http://github.com/NVIDIA-AI-IOT/jetcard) setup. Different system configurations may require additional steps. ## Usage From 7832b288ec0e9bc5bd3ead0550ce1f8183d63279 Mon Sep 17 00:00:00 2001 From: John Date: Fri, 21 Jun 2019 13:02:42 -0700 Subject: [PATCH 105/355] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e0f04683..d212862c 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ torch2trt is a PyTorch to TensorRT converter which utilizes the TensorRT Python API. The converter is -* Easy to use - Convert models with a single function call ``torch2trt`` +* Easy to use - Convert modules with a single function call ``torch2trt`` * Easy to extend - Write your own layer converter in Python and register it with ``@tensorrt_converter`` From a1e0ab93260bc71b68b4fc8791ddd70969703681 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 23 Jun 2019 18:34:51 -0400 Subject: [PATCH 106/355] added convert_view --- torch2trt/converters/Linear.py | 13 +++++++++++-- torch2trt/converters/__init__.py | 3 ++- torch2trt/converters/identity.py | 3 +-- torch2trt/converters/view.py | 10 ++++++++++ 4 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 torch2trt/converters/view.py diff --git a/torch2trt/converters/Linear.py b/torch2trt/converters/Linear.py index 538498a2..0982e143 100644 --- a/torch2trt/converters/Linear.py +++ b/torch2trt/converters/Linear.py @@ -7,10 +7,19 @@ def convert_Linear(ctx): input = ctx.method_args[1] output = ctx.method_return + # reshape to Nx1x1 + layer = ctx.network.add_shuffle(input._trt) + layer.reshape_dims = (-1, 1, 1) + + # add fully connected layer = ctx.network.add_fully_connected( - input=input._trt, + input=layer.get_output(0), num_outputs=module.out_features, kernel=module.weight.detach().cpu().numpy(), bias=module.bias.detach().cpu().numpy()) - output._trt = layer.get_output(0) \ No newline at end of file + # reshape back to N + layer = ctx.network.add_shuffle(layer.get_output(0)) + layer.reshape_dims = (-1,) + + output._trt = layer.get_output(0) diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 65951dfe..16b81f21 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -14,4 +14,5 @@ from .relu import * from .ReLU import * from .relu6 import * -from .ReLU6 import * \ No newline at end of file +from .ReLU6 import * +from .view import * diff --git a/torch2trt/converters/identity.py b/torch2trt/converters/identity.py index 5a2de660..2f5f9f05 100644 --- a/torch2trt/converters/identity.py +++ b/torch2trt/converters/identity.py @@ -1,11 +1,10 @@ from torch2trt.torch2trt import * -@tensorrt_converter('torch.Tensor.view') @tensorrt_converter('torch.nn.functional.dropout') @tensorrt_converter('torch.nn.functional.dropout2d') @tensorrt_converter('torch.nn.functional.dropout3d') def convert_identity(ctx): input = ctx.method_args[0] output = ctx.method_return - output._trt = input._trt \ No newline at end of file + output._trt = input._trt diff --git a/torch2trt/converters/view.py b/torch2trt/converters/view.py new file mode 100644 index 00000000..7ee67477 --- /dev/null +++ b/torch2trt/converters/view.py @@ -0,0 +1,10 @@ +from torch2trt.torch2trt import * + + +@tensorrt_converter('torch.Tensor.view') +def convert_view(ctx): + input = ctx.method_args[0] + output = ctx.method_return + layer = ctx.network.add_shuffle(input._trt) + layer.reshape_dims = tuple(output.shape[1:]) + output._trt = layer.get_output(0) From 2e6122eb9457e21fd8f24bc393696c233982f482 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 23 Jun 2019 19:35:52 -0400 Subject: [PATCH 107/355] added add_module_test --- torch2trt/module_test.py | 46 +++++++++++ torch2trt/test.py | 166 ++++++++++++++++----------------------- 2 files changed, 112 insertions(+), 100 deletions(-) create mode 100644 torch2trt/module_test.py diff --git a/torch2trt/module_test.py b/torch2trt/module_test.py new file mode 100644 index 00000000..60ce921d --- /dev/null +++ b/torch2trt/module_test.py @@ -0,0 +1,46 @@ +import torch +import torchvision + + +class ModuleTest(object): + def __init__(self, module_fn, dtype, device, input_shapes, **torch2trt_kwargs): + self.module_fn = module_fn + self.dtype = dtype + self.device = device + self.input_shapes = input_shapes + self.torch2trt_kwargs = torch2trt_kwargs + + def module_name(self): + return self.module_fn.__module__ + '.' + self.module_fn.__name__ + + +MODULE_TESTS = [ + ModuleTest(torchvision.models.alexnet, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.squeezenet1_0, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.squeezenet1_1, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.resnet18, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.resnet34, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.resnet50, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.resnet101, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.resnet152, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.densenet121, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.densenet169, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.densenet201, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.densenet161, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.vgg11, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.vgg13, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.vgg16, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.vgg19, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.vgg11_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.vgg13_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.vgg16_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.vgg19_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), +] + + +def add_module_test(dtype, device, input_shapes, **torch2trt_kwargs): + def register_module_test(module): + global MODULE_TESTS + MODULE_TESTS += [ModuleTest(module, dtype, device, input_shapes, **torch2trt_kwargs)] + return module + return register_module_test diff --git a/torch2trt/test.py b/torch2trt/test.py index 9f52ad08..3fc88813 100644 --- a/torch2trt/test.py +++ b/torch2trt/test.py @@ -1,117 +1,83 @@ from torch2trt import * +from .module_test import ModuleTest, MODULE_TESTS import torchvision import time import argparse import re -class ModuleTest(object): - def __init__(self, module_fn, dtype, device, input_shapes, **torch2trt_kwargs): - self.module_fn = module_fn - self.dtype = dtype - self.device = device - self.input_shapes = input_shapes - self.torch2trt_kwargs = torch2trt_kwargs - - def module_name(self): - return self.module_fn.__module__ + '.' + self.module_fn.__name__ +def run(self): + # create module + module = self.module_fn() + module = module.to(self.device) + module = module.type(self.dtype) + module = module.eval() - def run(self): - # create module - module = self.module_fn() - module = module.to(self.device) - module = module.type(self.dtype) - module = module.eval() - - # create inputs - inputs = () - for shape in self.input_shapes: - inputs += (torch.ones(shape).to(self.device).type(self.dtype), ) + # create inputs + inputs = () + for shape in self.input_shapes: + inputs += (torch.ones(shape).to(self.device).type(self.dtype), ) - # convert module - module_trt = torch2trt(module, inputs, **self.torch2trt_kwargs) + # convert module + module_trt = torch2trt(module, inputs, **self.torch2trt_kwargs) - # test output against original - outputs = module(*inputs) - outputs_trt = module_trt(*inputs) + # test output against original + outputs = module(*inputs) + outputs_trt = module_trt(*inputs) - if not isinstance(outputs, tuple): - outputs = (outputs, ) + if not isinstance(outputs, tuple): + outputs = (outputs, ) - # compute max error - max_error = 0 - for i in range(len(outputs)): - max_error_i = torch.max(torch.abs(outputs[i] - outputs_trt[i])) - if max_error_i > max_error: - max_error = max_error_i - - # benchmark pytorch throughput - torch.cuda.current_stream().synchronize() - t0 = time.time() - for i in range(50): - outputs = module(*inputs) - torch.cuda.current_stream().synchronize() - t1 = time.time() - - fps = 50.0 / (t1 - t0) - - # benchmark tensorrt throughput - torch.cuda.current_stream().synchronize() - t0 = time.time() - for i in range(50): - outputs = module_trt(*inputs) - torch.cuda.current_stream().synchronize() - t1 = time.time() - - fps_trt = 50.0 / (t1 - t0) - - # benchmark pytorch latency + # compute max error + max_error = 0 + for i in range(len(outputs)): + max_error_i = torch.max(torch.abs(outputs[i] - outputs_trt[i])) + if max_error_i > max_error: + max_error = max_error_i + + # benchmark pytorch throughput + torch.cuda.current_stream().synchronize() + t0 = time.time() + for i in range(50): + outputs = module(*inputs) + torch.cuda.current_stream().synchronize() + t1 = time.time() + + fps = 50.0 / (t1 - t0) + + # benchmark tensorrt throughput + torch.cuda.current_stream().synchronize() + t0 = time.time() + for i in range(50): + outputs = module_trt(*inputs) + torch.cuda.current_stream().synchronize() + t1 = time.time() + + fps_trt = 50.0 / (t1 - t0) + + # benchmark pytorch latency + torch.cuda.current_stream().synchronize() + t0 = time.time() + for i in range(50): + outputs = module(*inputs) torch.cuda.current_stream().synchronize() - t0 = time.time() - for i in range(50): - outputs = module(*inputs) - torch.cuda.current_stream().synchronize() - t1 = time.time() - - ms = 1000.0 * (t1 - t0) / 50.0 - - # benchmark tensorrt latency + t1 = time.time() + + ms = 1000.0 * (t1 - t0) / 50.0 + + # benchmark tensorrt latency + torch.cuda.current_stream().synchronize() + t0 = time.time() + for i in range(50): + outputs = module_trt(*inputs) torch.cuda.current_stream().synchronize() - t0 = time.time() - for i in range(50): - outputs = module_trt(*inputs) - torch.cuda.current_stream().synchronize() - t1 = time.time() + t1 = time.time() + + ms_trt = 1000.0 * (t1 - t0) / 50.0 + + return max_error, fps, fps_trt, ms, ms_trt - ms_trt = 1000.0 * (t1 - t0) / 50.0 - return max_error, fps, fps_trt, ms, ms_trt - - -MODULE_TESTS = [ - ModuleTest(torchvision.models.alexnet, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.squeezenet1_0, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.squeezenet1_1, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.resnet18, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.resnet34, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.resnet50, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.resnet101, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.resnet152, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.densenet121, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.densenet169, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.densenet201, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.densenet161, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg11, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg13, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg16, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg19, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg11_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg13_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg16_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg19_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), -] - - if __name__ == '__main__': parser = argparse.ArgumentParser() @@ -127,10 +93,10 @@ def run(self): continue # run test - max_error, fps, fps_trt, ms, ms_trt = test.run() + max_error, fps, fps_trt, ms, ms_trt = run(test) # write entry line = '| %s | %s | %s | %s | %.2E | %.3g | %.3g | %.3g | %.3g |' % (name, test.dtype.__repr__().split('.')[-1], str(test.input_shapes), str(test.torch2trt_kwargs), max_error, fps, fps_trt, ms, ms_trt) print(line) with open(args.output, 'a+') as f: - f.write(line + '\n') \ No newline at end of file + f.write(line + '\n') From ed9e40f34cfa35b4c697e2b949cede63a8a83a82 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 23 Jun 2019 19:46:21 -0400 Subject: [PATCH 108/355] added Conv2d unit tests --- torch2trt/converters/Conv2d.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/torch2trt/converters/Conv2d.py b/torch2trt/converters/Conv2d.py index 874ab98a..160758dc 100644 --- a/torch2trt/converters/Conv2d.py +++ b/torch2trt/converters/Conv2d.py @@ -1,4 +1,5 @@ from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test @tensorrt_converter('torch.nn.Conv2d.forward') @@ -19,6 +20,10 @@ def convert_Conv2d(ctx): if not isinstance(padding, tuple): padding = (padding, ) * 2 + dilation = module.dilation + if not isinstance(dilation, tuple): + dilation = (dilation, ) * 2 + kernel = module.weight.detach().cpu().numpy() bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) @@ -33,8 +38,30 @@ def convert_Conv2d(ctx): bias=bias) layer.stride = stride layer.padding = padding + layer.dilation = dilation if module.groups is not None: layer.num_groups = module.groups - output._trt = layer.get_output(0) \ No newline at end of file + output._trt = layer.get_output(0) + + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)]) +def test_Conv2d_basic(): + return torch.nn.Conv2d(10, 5, kernel_size=1, stride=1, padding=0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)]) +def test_Conv2d_stride2(): + return torch.nn.Conv2d(10, 5, kernel_size=1, stride=2, padding=0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)]) +def test_Conv2d_kernel3(): + return torch.nn.Conv2d(10, 5, kernel_size=3, stride=2, padding=1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)]) +def test_Conv2d_dilation2(): + return torch.nn.Conv2d(10, 5, kernel_size=3, stride=1, padding=1, dilation=2) From 7ca5d30709073e8fa98c5b7429a26b00ec06af49 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 23 Jun 2019 18:34:51 -0400 Subject: [PATCH 109/355] added convert_view --- torch2trt/converters/Linear.py | 13 +++++++++++-- torch2trt/converters/__init__.py | 3 ++- torch2trt/converters/identity.py | 3 +-- torch2trt/converters/view.py | 10 ++++++++++ 4 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 torch2trt/converters/view.py diff --git a/torch2trt/converters/Linear.py b/torch2trt/converters/Linear.py index 538498a2..0982e143 100644 --- a/torch2trt/converters/Linear.py +++ b/torch2trt/converters/Linear.py @@ -7,10 +7,19 @@ def convert_Linear(ctx): input = ctx.method_args[1] output = ctx.method_return + # reshape to Nx1x1 + layer = ctx.network.add_shuffle(input._trt) + layer.reshape_dims = (-1, 1, 1) + + # add fully connected layer = ctx.network.add_fully_connected( - input=input._trt, + input=layer.get_output(0), num_outputs=module.out_features, kernel=module.weight.detach().cpu().numpy(), bias=module.bias.detach().cpu().numpy()) - output._trt = layer.get_output(0) \ No newline at end of file + # reshape back to N + layer = ctx.network.add_shuffle(layer.get_output(0)) + layer.reshape_dims = (-1,) + + output._trt = layer.get_output(0) diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 65951dfe..16b81f21 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -14,4 +14,5 @@ from .relu import * from .ReLU import * from .relu6 import * -from .ReLU6 import * \ No newline at end of file +from .ReLU6 import * +from .view import * diff --git a/torch2trt/converters/identity.py b/torch2trt/converters/identity.py index 5a2de660..2f5f9f05 100644 --- a/torch2trt/converters/identity.py +++ b/torch2trt/converters/identity.py @@ -1,11 +1,10 @@ from torch2trt.torch2trt import * -@tensorrt_converter('torch.Tensor.view') @tensorrt_converter('torch.nn.functional.dropout') @tensorrt_converter('torch.nn.functional.dropout2d') @tensorrt_converter('torch.nn.functional.dropout3d') def convert_identity(ctx): input = ctx.method_args[0] output = ctx.method_return - output._trt = input._trt \ No newline at end of file + output._trt = input._trt diff --git a/torch2trt/converters/view.py b/torch2trt/converters/view.py new file mode 100644 index 00000000..7ee67477 --- /dev/null +++ b/torch2trt/converters/view.py @@ -0,0 +1,10 @@ +from torch2trt.torch2trt import * + + +@tensorrt_converter('torch.Tensor.view') +def convert_view(ctx): + input = ctx.method_args[0] + output = ctx.method_return + layer = ctx.network.add_shuffle(input._trt) + layer.reshape_dims = tuple(output.shape[1:]) + output._trt = layer.get_output(0) From 2b5332d85e9ea913524686d191a38897990ea660 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 23 Jun 2019 20:07:16 -0400 Subject: [PATCH 110/355] added view unit test --- torch2trt/converters/view.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/torch2trt/converters/view.py b/torch2trt/converters/view.py index 7ee67477..b9341327 100644 --- a/torch2trt/converters/view.py +++ b/torch2trt/converters/view.py @@ -1,4 +1,5 @@ from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test @tensorrt_converter('torch.Tensor.view') @@ -8,3 +9,34 @@ def convert_view(ctx): layer = ctx.network.add_shuffle(input._trt) layer.reshape_dims = tuple(output.shape[1:]) output._trt = layer.get_output(0) + + +class View(torch.nn.Module): + def __init__(self, *dims): + super(View, self).__init__() + self.dims = dims + + def forward(self, x): + return x.view(*self.dims) + + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_view_1d(): + return View(1, -1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_view_2d(): + return View(1, 1, -1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_view_3d(): + return View(1, 1, 1, -1) From dc82d98803c0a8e1c536abb75b1380ce43a46710 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 23 Jun 2019 20:09:44 -0400 Subject: [PATCH 111/355] added linear converter --- torch2trt/converters/Linear.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/torch2trt/converters/Linear.py b/torch2trt/converters/Linear.py index 0982e143..5d495f5e 100644 --- a/torch2trt/converters/Linear.py +++ b/torch2trt/converters/Linear.py @@ -1,4 +1,5 @@ from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test @tensorrt_converter('torch.nn.Linear.forward') @@ -23,3 +24,8 @@ def convert_Linear(ctx): layer.reshape_dims = (-1,) output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10)]) +def test_Linear_basic(): + return torch.nn.Linear(10, 5) From 4d894ca960bd70e5fc866974a839638be8d79eeb Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 23 Jun 2019 21:12:50 -0400 Subject: [PATCH 112/355] switched test to use randn inputs --- torch2trt/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch2trt/test.py b/torch2trt/test.py index 3fc88813..f1265947 100644 --- a/torch2trt/test.py +++ b/torch2trt/test.py @@ -16,7 +16,7 @@ def run(self): # create inputs inputs = () for shape in self.input_shapes: - inputs += (torch.ones(shape).to(self.device).type(self.dtype), ) + inputs += (torch.randn(shape).to(self.device).type(self.dtype), ) # convert module module_trt = torch2trt(module, inputs, **self.torch2trt_kwargs) From e902f7aa2368dab98192fcf461e450ae6a238513 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 23 Jun 2019 21:13:07 -0400 Subject: [PATCH 113/355] added AdaptiveAvgPool2d unit tests --- torch2trt/converters/AdaptiveAvgPool2d.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/torch2trt/converters/AdaptiveAvgPool2d.py b/torch2trt/converters/AdaptiveAvgPool2d.py index 93a13f5d..ea865156 100644 --- a/torch2trt/converters/AdaptiveAvgPool2d.py +++ b/torch2trt/converters/AdaptiveAvgPool2d.py @@ -1,4 +1,5 @@ from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test @tensorrt_converter('torch.nn.AdaptiveAvgPool2d.forward') @@ -18,4 +19,19 @@ def convert_AdaptiveAvgPool2d(ctx): input=input._trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size) layer.stride = stride - output._trt = layer.get_output(0) \ No newline at end of file + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_AdaptiveAvgPool2d_1x1(): + return torch.nn.AdaptiveAvgPool2d((1, 1)) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_AdaptiveAvgPool2d_2x2(): + return torch.nn.AdaptiveAvgPool2d((2, 2)) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_AdaptiveAvgPool2d_3x3(): + return torch.nn.AdaptiveAvgPool2d((3, 3)) From 372efba6c25e4fa6f3e8482b6189088fa1ebeb06 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 23 Jun 2019 22:46:25 -0400 Subject: [PATCH 114/355] added copy to handle inplace ops --- torch2trt/test.py | 7 +++++-- torch2trt/torch2trt.py | 6 +++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/torch2trt/test.py b/torch2trt/test.py index f1265947..b0915d75 100644 --- a/torch2trt/test.py +++ b/torch2trt/test.py @@ -18,16 +18,19 @@ def run(self): for shape in self.input_shapes: inputs += (torch.randn(shape).to(self.device).type(self.dtype), ) + # create copy of inputs to handle inplace ops + inputs_trt = tuple([tensor.clone() for tensor in inputs]) + # convert module module_trt = torch2trt(module, inputs, **self.torch2trt_kwargs) # test output against original outputs = module(*inputs) - outputs_trt = module_trt(*inputs) + outputs_trt = module_trt(*inputs_trt) if not isinstance(outputs, tuple): outputs = (outputs, ) - + # compute max error max_error = 0 for i in range(len(outputs)): diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 0e9ca223..ede57d01 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -225,6 +225,10 @@ def forward(self, *inputs): def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt.Logger.ERROR, max_batch_size=1, fp16_mode=False, max_workspace_size=0): + + # copy inputs to avoid modifications to source data + inputs = [tensor.clone() for tensor in inputs] + with trt.Logger(log_level) as logger, trt.Builder(logger) as builder,\ builder.create_network() as network, ConversionContext(network) as ctx: @@ -258,4 +262,4 @@ def tensorrt_converter(method): def register_converter(converter): CONVERTERS[method] = converter return converter - return register_converter \ No newline at end of file + return register_converter From fb37e088ceda87bfe44e4b722c8f5bf75d705294 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 23 Jun 2019 22:47:04 -0400 Subject: [PATCH 115/355] added test cases for add --- torch2trt/converters/add.py | 59 +++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/torch2trt/converters/add.py b/torch2trt/converters/add.py index 801e26c8..ab4390ef 100644 --- a/torch2trt/converters/add.py +++ b/torch2trt/converters/add.py @@ -1,11 +1,66 @@ from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test -@tensorrt_converter('torch.Tensor.__iadd__') @tensorrt_converter('torch.Tensor.__add__') def convert_add(ctx): input_a = ctx.method_args[0] input_b = ctx.method_args[1] output = ctx.method_return layer = ctx.network.add_elementwise(input_a._trt, input_b._trt, trt.ElementWiseOperation.SUM) - output._trt = layer.get_output(0) \ No newline at end of file + output._trt = layer.get_output(0) + + +@tensorrt_converter('torch.Tensor.__iadd__') +def convert_iadd(ctx): + input_a = ctx.method_args[0] + input_b = ctx.method_args[1] + layer = ctx.network.add_elementwise(input_a._trt, input_b._trt, trt.ElementWiseOperation.SUM) + ctx.method_args[0]._trt = layer.get_output(0) + + +# TEST z = x + y + +class Add(torch.nn.Module): + def __init__(self): + super(Add, self).__init__() + + def forward(self, x, y): + return x + y + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) +def test_add_basic(): + return Add() + + +# TEST x += y + +class IAdd(torch.nn.Module): + def __init__(self): + super(IAdd, self).__init__() + + def forward(self, x, y): + x += y + return x + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) +def test_iadd_basic(): + return IAdd() + + +# TEST y = x + 1 + + +class AddScalar(torch.nn.Module): + def __init__(self): + super(AddScalar, self).__init__() + + def forward(self, x): + x = x + 1 + return x + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_add_scalar(): + return AddScalar() From 7fd680c2f805211f2b8974a32eb167ec2868000a Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 23 Jun 2019 22:51:02 -0400 Subject: [PATCH 116/355] moved iadd to separate file --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/add.py | 43 -------------------------------- torch2trt/converters/iadd.py | 24 ++++++++++++++++++ 3 files changed, 25 insertions(+), 43 deletions(-) create mode 100644 torch2trt/converters/iadd.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 16b81f21..a9e91b15 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -1,6 +1,7 @@ from .adaptive_avg_pool2d import * from .AdaptiveAvgPool2d import * from .add import * +from .iadd import * from .AvgPool2d import * from .BatchNorm2d import * from .cat import * diff --git a/torch2trt/converters/add.py b/torch2trt/converters/add.py index ab4390ef..7483766b 100644 --- a/torch2trt/converters/add.py +++ b/torch2trt/converters/add.py @@ -11,16 +11,6 @@ def convert_add(ctx): output._trt = layer.get_output(0) -@tensorrt_converter('torch.Tensor.__iadd__') -def convert_iadd(ctx): - input_a = ctx.method_args[0] - input_b = ctx.method_args[1] - layer = ctx.network.add_elementwise(input_a._trt, input_b._trt, trt.ElementWiseOperation.SUM) - ctx.method_args[0]._trt = layer.get_output(0) - - -# TEST z = x + y - class Add(torch.nn.Module): def __init__(self): super(Add, self).__init__() @@ -31,36 +21,3 @@ def forward(self, x, y): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) def test_add_basic(): return Add() - - -# TEST x += y - -class IAdd(torch.nn.Module): - def __init__(self): - super(IAdd, self).__init__() - - def forward(self, x, y): - x += y - return x - - -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) -def test_iadd_basic(): - return IAdd() - - -# TEST y = x + 1 - - -class AddScalar(torch.nn.Module): - def __init__(self): - super(AddScalar, self).__init__() - - def forward(self, x): - x = x + 1 - return x - - -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) -def test_add_scalar(): - return AddScalar() diff --git a/torch2trt/converters/iadd.py b/torch2trt/converters/iadd.py new file mode 100644 index 00000000..7c513b6f --- /dev/null +++ b/torch2trt/converters/iadd.py @@ -0,0 +1,24 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.Tensor.__iadd__') +def convert_iadd(ctx): + input_a = ctx.method_args[0] + input_b = ctx.method_args[1] + layer = ctx.network.add_elementwise(input_a._trt, input_b._trt, trt.ElementWiseOperation.SUM) + ctx.method_args[0]._trt = layer.get_output(0) + + +class IAdd(torch.nn.Module): + def __init__(self): + super(IAdd, self).__init__() + + def forward(self, x, y): + x += y + return x + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) +def test_iadd_basic(): + return IAdd() From e16260e00451ca4f3f56633ee06e1ff0ca343de3 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 24 Jun 2019 01:37:40 -0400 Subject: [PATCH 117/355] added warning color highlight to test print --- torch2trt/test.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/torch2trt/test.py b/torch2trt/test.py index b0915d75..890e9261 100644 --- a/torch2trt/test.py +++ b/torch2trt/test.py @@ -1,9 +1,9 @@ from torch2trt import * from .module_test import ModuleTest, MODULE_TESTS -import torchvision import time import argparse import re +from termcolor import colored def run(self): @@ -86,6 +86,7 @@ def run(self): parser = argparse.ArgumentParser() parser.add_argument('--output', '-o', help='Test output file path', type=str, default='torch2trt_test.md') parser.add_argument('--name', help='Regular expression to filter modules to test by name', type=str, default='.*') + parser.add_argument('--tolerance', help='Maximum error to print warning for entry', type=float, default='-1') args = parser.parse_args() for test in MODULE_TESTS: @@ -100,6 +101,11 @@ def run(self): # write entry line = '| %s | %s | %s | %s | %.2E | %.3g | %.3g | %.3g | %.3g |' % (name, test.dtype.__repr__().split('.')[-1], str(test.input_shapes), str(test.torch2trt_kwargs), max_error, fps, fps_trt, ms, ms_trt) - print(line) + + if args.tolerance >= 0 and max_error > args.tolerance: + print(colored(line, 'yellow')) + else: + print(line) + with open(args.output, 'a+') as f: f.write(line + '\n') From b98859787b4bdde36fa5222a7d5bd8a9fda2d35e Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 24 Jun 2019 02:05:16 -0400 Subject: [PATCH 118/355] added reshape conversion to view --- torch2trt/converters/view.py | 1 + 1 file changed, 1 insertion(+) diff --git a/torch2trt/converters/view.py b/torch2trt/converters/view.py index b9341327..6920c39f 100644 --- a/torch2trt/converters/view.py +++ b/torch2trt/converters/view.py @@ -2,6 +2,7 @@ from torch2trt.module_test import add_module_test +@tensorrt_converter('torch.Tensor.reshape') @tensorrt_converter('torch.Tensor.view') def convert_view(ctx): input = ctx.method_args[0] From ac22f26898a83c633ce519f540dba2d7646a5286 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 24 Jun 2019 21:38:23 -0400 Subject: [PATCH 119/355] interpolate --- torch2trt/converters/interpolate/build.ninja | 18 ++++++++++++++++++ .../converters/interpolate/interpolate.cpp | 2 ++ torch2trt/converters/interpolate/interpolate.h | 0 .../converters/interpolate/interpolate.proto | 9 +++++++++ .../converters/interpolate/interpolate.py | 0 5 files changed, 29 insertions(+) create mode 100644 torch2trt/converters/interpolate/build.ninja create mode 100644 torch2trt/converters/interpolate/interpolate.cpp create mode 100644 torch2trt/converters/interpolate/interpolate.h create mode 100644 torch2trt/converters/interpolate/interpolate.proto create mode 100644 torch2trt/converters/interpolate/interpolate.py diff --git a/torch2trt/converters/interpolate/build.ninja b/torch2trt/converters/interpolate/build.ninja new file mode 100644 index 00000000..8cc1173b --- /dev/null +++ b/torch2trt/converters/interpolate/build.ninja @@ -0,0 +1,18 @@ +#cflags = -c -fpic + +plugin = interpolate +dir = . + +rule protoc + command = protoc $in --cpp_out=$dir --python_out=$dir $cflags + +rule cxx + command = g++ -c $in -fpic -o $out -lprotobuf + +rule link + command = g++ -shared -o $out $in + +build $plugin.pb.h $plugin.pb.cc ${plugin}_pb2.py: protoc $plugin.proto +build $plugin.pb.o: cxx $plugin.pb.cc +build $plugin.o: cxx $plugin.cpp +build $plugin.so: link $plugin.o $plugin.pb.o diff --git a/torch2trt/converters/interpolate/interpolate.cpp b/torch2trt/converters/interpolate/interpolate.cpp new file mode 100644 index 00000000..aeb9c8a2 --- /dev/null +++ b/torch2trt/converters/interpolate/interpolate.cpp @@ -0,0 +1,2 @@ +#include +#include "interpolate.pb.h" diff --git a/torch2trt/converters/interpolate/interpolate.h b/torch2trt/converters/interpolate/interpolate.h new file mode 100644 index 00000000..e69de29b diff --git a/torch2trt/converters/interpolate/interpolate.proto b/torch2trt/converters/interpolate/interpolate.proto new file mode 100644 index 00000000..cea57185 --- /dev/null +++ b/torch2trt/converters/interpolate/interpolate.proto @@ -0,0 +1,9 @@ +syntax = "proto3"; + + +message message_interpolate { + repeated int64 size = 1; + repeated double scale_factor = 2; + string mode = 3; + bool align_corners = 4; +} diff --git a/torch2trt/converters/interpolate/interpolate.py b/torch2trt/converters/interpolate/interpolate.py new file mode 100644 index 00000000..e69de29b From 86dd46a52d46326026f80268cbb7cc0a6de1dd8b Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 24 Jun 2019 23:18:55 -0400 Subject: [PATCH 120/355] asdf --- torch2trt/converters/interpolate/build.ninja | 5 +- .../converters/interpolate/interpolate.cpp | 123 +++++++++++++++++- .../converters/interpolate/interpolate.h | 0 .../converters/interpolate/interpolate.proto | 7 +- 4 files changed, 128 insertions(+), 7 deletions(-) delete mode 100644 torch2trt/converters/interpolate/interpolate.h diff --git a/torch2trt/converters/interpolate/build.ninja b/torch2trt/converters/interpolate/build.ninja index 8cc1173b..335c1437 100644 --- a/torch2trt/converters/interpolate/build.ninja +++ b/torch2trt/converters/interpolate/build.ninja @@ -2,15 +2,16 @@ plugin = interpolate dir = . +torch_dir = /usr/local/lib/python3.6/dist-packages/torch rule protoc command = protoc $in --cpp_out=$dir --python_out=$dir $cflags rule cxx - command = g++ -c $in -fpic -o $out -lprotobuf + command = g++ -c $in -fpic -o $out -I $torch_dir/include -I $torch_dir/include/torch/csrc/api/include rule link - command = g++ -shared -o $out $in + command = g++ -shared -o $out $in -lprotobuf -lc10 -lc10_cuda -ltorch -L $torch_dir/lib build $plugin.pb.h $plugin.pb.cc ${plugin}_pb2.py: protoc $plugin.proto build $plugin.pb.o: cxx $plugin.pb.cc diff --git a/torch2trt/converters/interpolate/interpolate.cpp b/torch2trt/converters/interpolate/interpolate.cpp index aeb9c8a2..76b5faa8 100644 --- a/torch2trt/converters/interpolate/interpolate.cpp +++ b/torch2trt/converters/interpolate/interpolate.cpp @@ -1,2 +1,123 @@ -#include +#include +#include +#include #include "interpolate.pb.h" + + +using namespace nvinfer1; + + +namespace torch2trt +{ + +class interpolate_plugin : public IPluginV2 { +private: + interpolate_message message; + at::TensorOptions tensor_options; + std::vector input_sizes; + std::vector output_sizes; + +public: + interpolate_plugin(interpolate_message message) : message(message) {} + + const char* getPluginType() const override { + return "interpolate"; + }; + + const char* getPluginVersion() const override { + return "1"; + } + + int getNbOutputs() const override { + return 1; + } + + Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override { + Dims dims; + dims.nbDims = inputs->nbDims; + + for (int i = 0; i < message.size_size(); i++) { + dims.d[i] = message.size(i); + } + + return dims; + } + + bool supportsFormat(DataType type, PluginFormat format) const override { + if (format != PluginFormat::kNCHW) { + return false; + } + if (type == DataType::kINT32 || type == DataType::kINT8) { + return false; + } + return true; + } + + void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, + int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) override { + tensor_options = tensor_options.device(c10::kCUDA); + if (type == DataType::kFLOAT) { + tensor_options = tensor_options.dtype(c10::kFloat); + } else if (type == DataType::kHALF) { + tensor_options = tensor_options.dtype(c10::kHalf); + } + + for (int i = 0; i < inputDims[0].nbDims; i++) { + input_sizes.push_back(inputDims[0].d[i]); + } + + for (int i = 0; i < outputDims[0].nbDims; i++) { + output_sizes.push_back(outputDims[0].d[i]); + } + } + + int initialize() override { + return 0; + } + + void terminate() override {} + + size_t getWorkspaceSize(int maxBatchSize) const override { return 0; } + + int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) override { + // get input / output dimensions + std::vector batch_input_sizes = input_sizes; + std::vector batch_output_sizes = output_sizes; + batch_input_sizes.insert(batch_input_sizes.begin(), batchSize); + batch_output_sizes.insert(batch_output_sizes.begin(), batchSize); + + // create tensor wrappers + at::Tensor input = at::from_blob((void*) inputs[0], input_sizes, [](void*){}, tensor_options); + at::Tensor output = at::from_blob(outputs[0], input_sizes, [](void*){}, tensor_options); + + // execute + if (message.mode() == "nearest") { + at::upsample_bilinear2d_out(output, input, {output_sizes[2], output_sizes[3]}, message.align_corners()); + } + + return 0; + } + + size_t getSerializationSize() const override { + return message.SerializeAsString().size(); + } + + void serialize(void* buffer) const override { + message.SerializeToArray(buffer, getSerializationSize()); + } + + void destroy() override {} + + IPluginV2* clone() const override { + return new interpolate_plugin(message); + } + + void setPluginNamespace(const char* pluginNamespace) override {} + + const char *getPluginNamespace() const override { + return "torch2trt"; + } + +}; + +} diff --git a/torch2trt/converters/interpolate/interpolate.h b/torch2trt/converters/interpolate/interpolate.h deleted file mode 100644 index e69de29b..00000000 diff --git a/torch2trt/converters/interpolate/interpolate.proto b/torch2trt/converters/interpolate/interpolate.proto index cea57185..7319f650 100644 --- a/torch2trt/converters/interpolate/interpolate.proto +++ b/torch2trt/converters/interpolate/interpolate.proto @@ -1,9 +1,8 @@ syntax = "proto3"; -message message_interpolate { +message interpolate_message { repeated int64 size = 1; - repeated double scale_factor = 2; - string mode = 3; - bool align_corners = 4; + string mode = 2; + bool align_corners = 3; } From 4a6f698da73bd30af91ca864a646e4a4d90ca3a6 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 25 Jun 2019 15:01:36 -0400 Subject: [PATCH 121/355] asdf --- torch2trt/converters/interpolate/build.ninja | 37 ++++++++-- .../converters/interpolate/interpolate.cpp | 67 ++++++++++++++++++- 2 files changed, 95 insertions(+), 9 deletions(-) diff --git a/torch2trt/converters/interpolate/build.ninja b/torch2trt/converters/interpolate/build.ninja index 335c1437..625bdb9f 100644 --- a/torch2trt/converters/interpolate/build.ninja +++ b/torch2trt/converters/interpolate/build.ninja @@ -3,17 +3,40 @@ plugin = interpolate dir = . torch_dir = /usr/local/lib/python3.6/dist-packages/torch +cuda_dir = /usr/local/cuda rule protoc command = protoc $in --cpp_out=$dir --python_out=$dir $cflags rule cxx - command = g++ -c $in -fpic -o $out -I $torch_dir/include -I $torch_dir/include/torch/csrc/api/include - -rule link - command = g++ -shared -o $out $in -lprotobuf -lc10 -lc10_cuda -ltorch -L $torch_dir/lib + command = g++ $in -o $out -I$cuda_dir/include -I$torch_dir/include -I$torch_dir/include/torch/csrc/api/include -L$torch_dir/lib -L$cuda_dir/lib64 -lc10 -lc10_cuda -ltorch -lcudart -lcaffe2 -lcaffe2_gpu -D_GLIBCXX_USE_CXX11_ABI=0 -lprotobuf -lprotobuf-lite -pthread -lpthread build $plugin.pb.h $plugin.pb.cc ${plugin}_pb2.py: protoc $plugin.proto -build $plugin.pb.o: cxx $plugin.pb.cc -build $plugin.o: cxx $plugin.cpp -build $plugin.so: link $plugin.o $plugin.pb.o +build $plugin: cxx $plugin.pb.cc $plugin.cpp + +# +#rule cxx +# command = g++ $in -o $out -I $torch_dir/include -I $torch_dir/include/torch/csrc/api/include -I $cuda_dir/include -lprotobuf -lc10 -lc10_cuda -ltorch -pthread -lpthread -L $torch_dir/lib -L$cuda_dir/lib64 -lcudart -lcaffe2 -lcaffe2_gpu -D_GLIBCXX_USE_CXX11_ABI=0 -lnvinfer -lprotobuf-lite -lprotoc +# +#rule cxx_plugin +# command = g++ -c -fPIC $in -I $torch_dir/include -I $torch_dir/include/torch/csrc/api/include -I $cuda_dir/include -D_GLIBCXX_USE_CXX11_ABI=0 +# +#rule cxx_proto +# command = g++ -c -fPIC $in +# +#rule link +# command = g++ -shared -o $out $in -pthread -lpthread -lprotobuf -lprotobuf-lite -lnvinfer -lc10 -lc10_cuda -ltorch -L $torch_dir/lib -L $cuda_dir/lib64 +# +#rule exe +# command = g++ $in -o $out -L. -l$plugin#-I $torch_dir/include -I $torch_dir/include/torch/csrc/api/include -I $cuda_dir/include -lc10 -lc10_cuda -ltorch -L$torch_dir/lib -lcudart -L $cuda_dir/lib64 -lcaffe2 -lcaffe2_gpu -D_GLIBCXX_USE_CXX11_ABI=0 -lnvinfer +# +#build $plugin.pb.h $plugin.pb.cc ${plugin}_pb2.py: protoc $plugin.proto +#build $plugin.pb.o: cxx_proto $plugin.pb.cc +#build $plugin.o: cxx_plugin $plugin.cpp +#build $plugin.so: link $plugin.o $plugin.pb.o +# +#build $plugin.pb.o: cxx $plugin.pb.cc +#build $plugin.o: cxx $plugin.cpp +#build $plugin.so: cxx $plugin.cpp $plugin.pb.cc + +#build test: exe test.cpp diff --git a/torch2trt/converters/interpolate/interpolate.cpp b/torch2trt/converters/interpolate/interpolate.cpp index 76b5faa8..ab828cc0 100644 --- a/torch2trt/converters/interpolate/interpolate.cpp +++ b/torch2trt/converters/interpolate/interpolate.cpp @@ -1,6 +1,9 @@ +#include #include #include +#include #include +#include #include "interpolate.pb.h" @@ -90,11 +93,33 @@ class interpolate_plugin : public IPluginV2 { at::Tensor input = at::from_blob((void*) inputs[0], input_sizes, [](void*){}, tensor_options); at::Tensor output = at::from_blob(outputs[0], input_sizes, [](void*){}, tensor_options); - // execute - if (message.mode() == "nearest") { + // create new torch cuda stream + at::cuda::CUDAStream torch_stream = at::cuda::getStreamFromPool(); + at::cuda::CUDAStreamGuard torch_guard(torch_stream); + + // capture current work on tensorrt cuda stream + cudaEvent_t event; + cudaEventCreate(&event); + cudaEventRecord(event, stream); + + // make torch cuda stream wait on tensorrt work + cudaStreamWaitEvent(torch_stream.stream(), event, 0); + + // enqueue work + if (message.mode() == "bilinear") { at::upsample_bilinear2d_out(output, input, {output_sizes[2], output_sizes[3]}, message.align_corners()); } + // capture event on enqueued stream + cudaEvent_t torch_event; + cudaEventCreate(&torch_event); + cudaEventRecord(torch_event, torch_stream.stream()); + + cudaStreamWaitEvent(stream, torch_event, 0); + + cudaEventDestroy(event); + cudaEventDestroy(torch_event); + return 0; } @@ -120,4 +145,42 @@ class interpolate_plugin : public IPluginV2 { }; +class interpolate_PluginCreator : public IPluginCreator { +public: + interpolate_PluginCreator() {} + + const char *getPluginNamespace() const override { + return "torch2trt"; + } + + const char *getPluginName() const override { + return "interpolate"; + } + + const char *getPluginVersion() const override { + return "1"; + } + + IPluginV2 *deserializePlugin(const char *name, const void *data, size_t length) override { + interpolate_message message; + message.ParseFromArray(data, length); + return new interpolate_plugin(message); + } + + void setPluginNamespace(const char *N) override {} + const PluginFieldCollection *getFieldNames() override { return nullptr; } + + IPluginV2 *createPlugin(const char *name, const PluginFieldCollection *fc) override { return nullptr; } + +}; + +REGISTER_TENSORRT_PLUGIN(interpolate_PluginCreator); + +} + +int main() { + + interpolate_message m; + std::cout << m.size_size() << std::endl; + return 0; } From 8e9522fadb1dfa9cadf57f3f10fb1ca5d78464c7 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 25 Jun 2019 17:18:43 -0400 Subject: [PATCH 122/355] working interpolate --- torch2trt/converters/interpolate/build.ninja | 33 ++----------------- .../converters/interpolate/interpolate.cpp | 16 +++------ 2 files changed, 7 insertions(+), 42 deletions(-) diff --git a/torch2trt/converters/interpolate/build.ninja b/torch2trt/converters/interpolate/build.ninja index 625bdb9f..021b6b53 100644 --- a/torch2trt/converters/interpolate/build.ninja +++ b/torch2trt/converters/interpolate/build.ninja @@ -1,5 +1,3 @@ -#cflags = -c -fpic - plugin = interpolate dir = . torch_dir = /usr/local/lib/python3.6/dist-packages/torch @@ -9,34 +7,7 @@ rule protoc command = protoc $in --cpp_out=$dir --python_out=$dir $cflags rule cxx - command = g++ $in -o $out -I$cuda_dir/include -I$torch_dir/include -I$torch_dir/include/torch/csrc/api/include -L$torch_dir/lib -L$cuda_dir/lib64 -lc10 -lc10_cuda -ltorch -lcudart -lcaffe2 -lcaffe2_gpu -D_GLIBCXX_USE_CXX11_ABI=0 -lprotobuf -lprotobuf-lite -pthread -lpthread + command = g++ -shared -fPIC $in -o $out -I$cuda_dir/include -I$torch_dir/include -I$torch_dir/include/torch/csrc/api/include -L$torch_dir/lib -L$cuda_dir/lib64 -lc10 -lc10_cuda -ltorch -lcudart -lcaffe2 -lcaffe2_gpu -lprotobuf -lprotobuf-lite -pthread -lpthread -lnvinfer build $plugin.pb.h $plugin.pb.cc ${plugin}_pb2.py: protoc $plugin.proto -build $plugin: cxx $plugin.pb.cc $plugin.cpp - -# -#rule cxx -# command = g++ $in -o $out -I $torch_dir/include -I $torch_dir/include/torch/csrc/api/include -I $cuda_dir/include -lprotobuf -lc10 -lc10_cuda -ltorch -pthread -lpthread -L $torch_dir/lib -L$cuda_dir/lib64 -lcudart -lcaffe2 -lcaffe2_gpu -D_GLIBCXX_USE_CXX11_ABI=0 -lnvinfer -lprotobuf-lite -lprotoc -# -#rule cxx_plugin -# command = g++ -c -fPIC $in -I $torch_dir/include -I $torch_dir/include/torch/csrc/api/include -I $cuda_dir/include -D_GLIBCXX_USE_CXX11_ABI=0 -# -#rule cxx_proto -# command = g++ -c -fPIC $in -# -#rule link -# command = g++ -shared -o $out $in -pthread -lpthread -lprotobuf -lprotobuf-lite -lnvinfer -lc10 -lc10_cuda -ltorch -L $torch_dir/lib -L $cuda_dir/lib64 -# -#rule exe -# command = g++ $in -o $out -L. -l$plugin#-I $torch_dir/include -I $torch_dir/include/torch/csrc/api/include -I $cuda_dir/include -lc10 -lc10_cuda -ltorch -L$torch_dir/lib -lcudart -L $cuda_dir/lib64 -lcaffe2 -lcaffe2_gpu -D_GLIBCXX_USE_CXX11_ABI=0 -lnvinfer -# -#build $plugin.pb.h $plugin.pb.cc ${plugin}_pb2.py: protoc $plugin.proto -#build $plugin.pb.o: cxx_proto $plugin.pb.cc -#build $plugin.o: cxx_plugin $plugin.cpp -#build $plugin.so: link $plugin.o $plugin.pb.o -# -#build $plugin.pb.o: cxx $plugin.pb.cc -#build $plugin.o: cxx $plugin.cpp -#build $plugin.so: cxx $plugin.cpp $plugin.pb.cc - -#build test: exe test.cpp +build $plugin.so: cxx $plugin.pb.cc $plugin.cpp diff --git a/torch2trt/converters/interpolate/interpolate.cpp b/torch2trt/converters/interpolate/interpolate.cpp index ab828cc0..121b7bc1 100644 --- a/torch2trt/converters/interpolate/interpolate.cpp +++ b/torch2trt/converters/interpolate/interpolate.cpp @@ -39,8 +39,9 @@ class interpolate_plugin : public IPluginV2 { Dims dims; dims.nbDims = inputs->nbDims; + dims.d[0] = inputs->d[0]; for (int i = 0; i < message.size_size(); i++) { - dims.d[i] = message.size(i); + dims.d[i + 1] = message.size(i); } return dims; @@ -90,8 +91,8 @@ class interpolate_plugin : public IPluginV2 { batch_output_sizes.insert(batch_output_sizes.begin(), batchSize); // create tensor wrappers - at::Tensor input = at::from_blob((void*) inputs[0], input_sizes, [](void*){}, tensor_options); - at::Tensor output = at::from_blob(outputs[0], input_sizes, [](void*){}, tensor_options); + at::Tensor input = at::from_blob((void*) inputs[0], batch_input_sizes, [](void*){}, tensor_options); + at::Tensor output = at::from_blob(outputs[0], batch_output_sizes, [](void*){}, tensor_options); // create new torch cuda stream at::cuda::CUDAStream torch_stream = at::cuda::getStreamFromPool(); @@ -107,7 +108,7 @@ class interpolate_plugin : public IPluginV2 { // enqueue work if (message.mode() == "bilinear") { - at::upsample_bilinear2d_out(output, input, {output_sizes[2], output_sizes[3]}, message.align_corners()); + at::upsample_bilinear2d_out(output, input, {message.size(0), message.size(1)}, message.align_corners()); } // capture event on enqueued stream @@ -177,10 +178,3 @@ class interpolate_PluginCreator : public IPluginCreator { REGISTER_TENSORRT_PLUGIN(interpolate_PluginCreator); } - -int main() { - - interpolate_message m; - std::cout << m.size_size() << std::endl; - return 0; -} From 17627ee2900792e6199adcd8e9eef84a019f5d2c Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 25 Jun 2019 18:23:00 -0400 Subject: [PATCH 123/355] added build script --- build.py | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 build.py diff --git a/build.py b/build.py new file mode 100644 index 00000000..8f725136 --- /dev/null +++ b/build.py @@ -0,0 +1,53 @@ +import imp +import os +from string import Template + +PLUGINS = [ + 'interpolate', +] + +BASE_FOLDER = 'torch2trt/converters' + +NINJA_STR = Template( +""" +rule link + command = g++ -shared -o $$out $$in -L$torch_dir/lib -L$cuda_dir/lib64 -lc10 -lc10_cuda -ltorch -lcudart -lcaffe2 -lcaffe2_gpu -lprotobuf -lprotobuf-lite -pthread -lpthread -lnvinfer + +rule protoc + command = protoc $$in --cpp_out=. --python_out=. + +rule cxx + command = g++ -c -fPIC $$in -I$cuda_dir/include -I$torch_dir/include -I$torch_dir/include/torch/csrc/api/include -I. + +""" +).substitute({ + 'torch_dir': imp.find_module('torch')[1], + 'cuda_dir': '/usr/local/cuda' +}) + +PLUGIN_TEMPLATE = Template( +""" +build $plugin_dir/$plugin.pb.h $plugin_dir/$plugin.pb.cc $plugin_dir/${plugin}_pb2.py: protoc $plugin_dir/$plugin.proto +build $plugin.pb.o $plugin.o: cxx $plugin_dir/$plugin.pb.cc $plugin_dir/$plugin.cpp +""" +) + + +if __name__ == '__main__': + plugin_o_files = [] + for plugin in PLUGINS: + NINJA_STR += \ + PLUGIN_TEMPLATE.substitute({ + 'plugin': plugin, + 'plugin_dir': os.path.join(BASE_FOLDER, plugin), + }) + plugin_o_files += [plugin + '.pb.o', plugin + '.o'] + + NINJA_STR += Template( +""" +build torch2trt.so: link $o_files +""" + ).substitute({'o_files': ' '.join(plugin_o_files)}) + + with open('build.ninja', 'w') as f: + f.write(NINJA_STR) From 7d2bdf02ce1f5b78f77a8209f7779fcfe8f96037 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 25 Jun 2019 18:43:02 -0400 Subject: [PATCH 124/355] plugins --- build.py | 11 +++++++++-- setup.py | 7 +++++++ torch2trt/__init__.py | 15 ++++++++++++++- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/build.py b/build.py index 8f725136..a2997dd5 100644 --- a/build.py +++ b/build.py @@ -1,4 +1,5 @@ import imp +import subprocess import os from string import Template @@ -33,7 +34,8 @@ ) -if __name__ == '__main__': +def build(): + global PLUGINS, BASE_FOLDER, NINJA_STR, PLUGIN_TEMPLATE plugin_o_files = [] for plugin in PLUGINS: NINJA_STR += \ @@ -45,9 +47,14 @@ NINJA_STR += Template( """ -build torch2trt.so: link $o_files +build torch2trt/libtorch2trt.so: link $o_files """ ).substitute({'o_files': ' '.join(plugin_o_files)}) with open('build.ninja', 'w') as f: f.write(NINJA_STR) + + subprocess.call(['ninja']) + +if __name__ == '__main__': + build() diff --git a/setup.py b/setup.py index e65ffb2d..f5b413c5 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,15 @@ from setuptools import setup, find_packages +from build import build + +try: + build() +except e: + print('Could not build plugins') setup( name='torch2trt', version='0.0', description='PyTorch to TensorRT converter', packages=find_packages(), + package_data={'torch2trt': ['libtorch2trt.so']} ) diff --git a/torch2trt/__init__.py b/torch2trt/__init__.py index 23eac8db..affc5dbf 100644 --- a/torch2trt/__init__.py +++ b/torch2trt/__init__.py @@ -1,2 +1,15 @@ from .torch2trt import * -from .converters import * \ No newline at end of file +from .converters import * + + +def load_plugins(): + import os + import ctypes + ctypes.CDLL(os.path.join(os.path.dirname(__file__), 'libtorch2trt.so')) + + +try: + load_plugins() + PLUGINS_LOADED = True +except e: + PLUGINS_LOADED = False From ec4a467bbc00604475c0ea1dba488c06e360d34e Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 25 Jun 2019 19:54:02 -0400 Subject: [PATCH 125/355] added interpolate --- build.py | 3 ++- torch2trt/converters/interpolate/interpolate.cpp | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/build.py b/build.py index a2997dd5..68b103b1 100644 --- a/build.py +++ b/build.py @@ -29,7 +29,8 @@ PLUGIN_TEMPLATE = Template( """ build $plugin_dir/$plugin.pb.h $plugin_dir/$plugin.pb.cc $plugin_dir/${plugin}_pb2.py: protoc $plugin_dir/$plugin.proto -build $plugin.pb.o $plugin.o: cxx $plugin_dir/$plugin.pb.cc $plugin_dir/$plugin.cpp +build $plugin.pb.o: cxx $plugin_dir/$plugin.pb.cc +build $plugin.o: cxx $plugin_dir/$plugin.cpp """ ) diff --git a/torch2trt/converters/interpolate/interpolate.cpp b/torch2trt/converters/interpolate/interpolate.cpp index 121b7bc1..5fe4e992 100644 --- a/torch2trt/converters/interpolate/interpolate.cpp +++ b/torch2trt/converters/interpolate/interpolate.cpp @@ -109,6 +109,7 @@ class interpolate_plugin : public IPluginV2 { // enqueue work if (message.mode() == "bilinear") { at::upsample_bilinear2d_out(output, input, {message.size(0), message.size(1)}, message.align_corners()); + } else if (message.mode() == "nearest") { } // capture event on enqueued stream From 958d30e76600c36c6ffa981f763fae415ec1bc6b Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 25 Jun 2019 20:01:01 -0400 Subject: [PATCH 126/355] interpolate --- build.py | 3 +- torch2trt/converters/__init__.py | 1 + torch2trt/converters/interpolate/build.ninja | 13 ---- .../converters/interpolate/interpolate.cpp | 18 +++-- .../converters/interpolate/interpolate.proto | 4 +- .../converters/interpolate/interpolate.py | 69 +++++++++++++++++++ 6 files changed, 87 insertions(+), 21 deletions(-) delete mode 100644 torch2trt/converters/interpolate/build.ninja diff --git a/build.py b/build.py index a2997dd5..68b103b1 100644 --- a/build.py +++ b/build.py @@ -29,7 +29,8 @@ PLUGIN_TEMPLATE = Template( """ build $plugin_dir/$plugin.pb.h $plugin_dir/$plugin.pb.cc $plugin_dir/${plugin}_pb2.py: protoc $plugin_dir/$plugin.proto -build $plugin.pb.o $plugin.o: cxx $plugin_dir/$plugin.pb.cc $plugin_dir/$plugin.cpp +build $plugin.pb.o: cxx $plugin_dir/$plugin.pb.cc +build $plugin.o: cxx $plugin_dir/$plugin.cpp """ ) diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index a9e91b15..cb80d58d 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -9,6 +9,7 @@ from .ConvTranspose2d import * from .identity import * from .Identity import * +from .interpolate import * from .Linear import * from .LogSoftmax import * from .MaxPool2d import * diff --git a/torch2trt/converters/interpolate/build.ninja b/torch2trt/converters/interpolate/build.ninja deleted file mode 100644 index 021b6b53..00000000 --- a/torch2trt/converters/interpolate/build.ninja +++ /dev/null @@ -1,13 +0,0 @@ -plugin = interpolate -dir = . -torch_dir = /usr/local/lib/python3.6/dist-packages/torch -cuda_dir = /usr/local/cuda - -rule protoc - command = protoc $in --cpp_out=$dir --python_out=$dir $cflags - -rule cxx - command = g++ -shared -fPIC $in -o $out -I$cuda_dir/include -I$torch_dir/include -I$torch_dir/include/torch/csrc/api/include -L$torch_dir/lib -L$cuda_dir/lib64 -lc10 -lc10_cuda -ltorch -lcudart -lcaffe2 -lcaffe2_gpu -lprotobuf -lprotobuf-lite -pthread -lpthread -lnvinfer - -build $plugin.pb.h $plugin.pb.cc ${plugin}_pb2.py: protoc $plugin.proto -build $plugin.so: cxx $plugin.pb.cc $plugin.cpp diff --git a/torch2trt/converters/interpolate/interpolate.cpp b/torch2trt/converters/interpolate/interpolate.cpp index 121b7bc1..6f68846d 100644 --- a/torch2trt/converters/interpolate/interpolate.cpp +++ b/torch2trt/converters/interpolate/interpolate.cpp @@ -13,15 +13,15 @@ using namespace nvinfer1; namespace torch2trt { -class interpolate_plugin : public IPluginV2 { +class interpolate_Plugin : public IPluginV2 { private: - interpolate_message message; + interpolate_Message message; at::TensorOptions tensor_options; std::vector input_sizes; std::vector output_sizes; public: - interpolate_plugin(interpolate_message message) : message(message) {} + interpolate_Plugin(interpolate_Message message) : message(message) {} const char* getPluginType() const override { return "interpolate"; @@ -109,6 +109,12 @@ class interpolate_plugin : public IPluginV2 { // enqueue work if (message.mode() == "bilinear") { at::upsample_bilinear2d_out(output, input, {message.size(0), message.size(1)}, message.align_corners()); + } else if (message.mode() == "nearest") { + at::upsample_nearest2d_out(output, input, {message.size(0), message.size(1)}); + } else if (message.mode() == "area") { + at::adaptive_avg_pool2d_out(output, input, {message.size(0), message.size(1)}); + } else if (message.mode() == "bicubic") { + at::upsample_bicubic2d_out(output, input, {message.size(0), message.size(1)}, message.align_corners()); } // capture event on enqueued stream @@ -135,7 +141,7 @@ class interpolate_plugin : public IPluginV2 { void destroy() override {} IPluginV2* clone() const override { - return new interpolate_plugin(message); + return new interpolate_Plugin(message); } void setPluginNamespace(const char* pluginNamespace) override {} @@ -163,9 +169,9 @@ class interpolate_PluginCreator : public IPluginCreator { } IPluginV2 *deserializePlugin(const char *name, const void *data, size_t length) override { - interpolate_message message; + interpolate_Message message; message.ParseFromArray(data, length); - return new interpolate_plugin(message); + return new interpolate_Plugin(message); } void setPluginNamespace(const char *N) override {} diff --git a/torch2trt/converters/interpolate/interpolate.proto b/torch2trt/converters/interpolate/interpolate.proto index 7319f650..b91db045 100644 --- a/torch2trt/converters/interpolate/interpolate.proto +++ b/torch2trt/converters/interpolate/interpolate.proto @@ -1,7 +1,9 @@ syntax = "proto3"; -message interpolate_message { +package torch2trt; + +message interpolate_Message { repeated int64 size = 1; string mode = 2; bool align_corners = 3; diff --git a/torch2trt/converters/interpolate/interpolate.py b/torch2trt/converters/interpolate/interpolate.py index e69de29b..cdf763b7 100644 --- a/torch2trt/converters/interpolate/interpolate.py +++ b/torch2trt/converters/interpolate/interpolate.py @@ -0,0 +1,69 @@ +import tensorrt as trt +import torch.nn.functional as F +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test +from .interpolate_pb2 import interpolate_Message + + +def get_interpolate_plugin(size, mode, align_corners): + PLUGIN_NAME = 'interpolate' + registry = trt.get_plugin_registry() + creator = [c for c in registry.plugin_creator_list if c.name == PLUGIN_NAME and c.plugin_namespace == 'torch2trt'][0] + message = interpolate_Message(size=size, mode=mode, align_corners=align_corners) + return creator.deserialize_plugin(PLUGIN_NAME, message.SerializeToString()) + + +@tensorrt_converter('torch.nn.functional.interpolate') +def convert_interpolate(ctx): + input = ctx.method_args[0] + output = ctx.method_return + + try: + mode = ctx.method_kwargs['mode'] + except KeyError: + mode = 'nearest' + + try: + align_corners = ctx.method_kwargs['align_corners'] + except KeyError: + align_corners = False + + # currently only works for NCHW + size = list(output.shape[2:]) + + plugin = get_interpolate_plugin(size=size, mode=mode, align_corners=align_corners) + + layer = ctx.network.add_plugin_v2([input._trt], plugin) + + output._trt = layer.get_output(0) + + +class Interpolate(torch.nn.Module): + def __init__(self, size, mode, align_corners): + super(Interpolate, self).__init__() + self.size = size + self.mode = mode + self.align_corners = align_corners + + def forward(self, x): + return F.interpolate(x, self.size, mode=self.mode, align_corners=self.align_corners) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)]) +def test_interpolate_nearest(): + return Interpolate((224, 224), 'nearest', None) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)]) +def test_interpolate_bilinear(): + return Interpolate((224, 224), 'bilinear', False) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)]) +def test_interpolate_bicubic(): + return Interpolate((224, 224), 'bicubic', False) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)]) +def test_interpolate_area(): + return Interpolate((56, 56), 'area', None) From 2a4f28b63b0677b16aa36a94eef945ec1dfe88b4 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 25 Jun 2019 20:04:46 -0400 Subject: [PATCH 127/355] added missing __init__ --- torch2trt/converters/interpolate/__init__.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 torch2trt/converters/interpolate/__init__.py diff --git a/torch2trt/converters/interpolate/__init__.py b/torch2trt/converters/interpolate/__init__.py new file mode 100644 index 00000000..ff29a907 --- /dev/null +++ b/torch2trt/converters/interpolate/__init__.py @@ -0,0 +1 @@ +from .interpolate import * From 1687f5e8217c8f525247dabafe80df7361ff6b79 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 25 Jun 2019 21:34:56 -0400 Subject: [PATCH 128/355] added segmentation demo --- notebooks/image_segmentation/conversion.ipynb | 226 ++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 notebooks/image_segmentation/conversion.ipynb diff --git a/notebooks/image_segmentation/conversion.ipynb b/notebooks/image_segmentation/conversion.ipynb new file mode 100644 index 00000000..077c53ee --- /dev/null +++ b/notebooks/image_segmentation/conversion.ipynb @@ -0,0 +1,226 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torchvision\n", + "import torch2trt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = torchvision.models.segmentation.deeplabv3_resnet101(pretrained=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = model.cuda().eval().half()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class ModelWrapper(torch.nn.Module):\n", + " def __init__(self, model):\n", + " super(ModelWrapper, self).__init__()\n", + " self.model = model\n", + " def forward(self, x):\n", + " return model(x)['out']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model_w = ModelWrapper(model).half()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data = torch.ones((1, 3, 224, 224)).cuda().half()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model_trt = torch2trt.torch2trt(model_w, [data], fp16_mode=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Live demo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# from jetcam.csi_camera import CSICamera\n", + "from jetcam.usb_camera import USBCamera\n", + "\n", + "# camera = CSICamera(width=224, height=224)\n", + "camera = USBCamera(width=224, height=224)\n", + "\n", + "camera.running = True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from jetcam.utils import bgr8_to_jpeg\n", + "import traitlets\n", + "import ipywidgets\n", + "\n", + "image_w = ipywidgets.Image()\n", + "\n", + "traitlets.dlink((camera, 'value'), (image_w, 'value'), transform=bgr8_to_jpeg)\n", + "\n", + "display(image_w)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import cv2\n", + "import numpy as np\n", + "import torchvision\n", + "\n", + "device = torch.device('cuda')\n", + "mean = 255.0 * np.array([0.485, 0.456, 0.406])\n", + "stdev = 255.0 * np.array([0.229, 0.224, 0.225])\n", + "\n", + "normalize = torchvision.transforms.Normalize(mean, stdev)\n", + "\n", + "def preprocess(camera_value):\n", + " global device, normalize\n", + " x = camera_value\n", + " x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)\n", + " x = x.transpose((2, 0, 1))\n", + " x = torch.from_numpy(x).float()\n", + " x = normalize(x)\n", + " x = x.to(device)\n", + " x = x[None, ...]\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "seg_image = ipywidgets.Image()\n", + "\n", + "display(seg_image)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def execute(change):\n", + " image = change['new']\n", + " output = model_trt(preprocess(camera.value).half())[0].detach().cpu().float().numpy()\n", + " mask = 1.0 * (output.argmax(0) == 15)\n", + " seg_image.value = bgr8_to_jpeg(mask[:, :, None] * image)\n", + " \n", + " \n", + "mask = execute({'new': camera.value})\n", + "# camera.observe(execute, names='value')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "camera.observe(execute, names='value')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "camera.unobserve(execute, names='value')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "\n", + "torch.cuda.current_stream().synchronize()\n", + "t0 = time.time()\n", + "for i in range(100):\n", + " output = model_w(preprocess(camera.value).half())\n", + "torch.cuda.current_stream().synchronize()\n", + "t1 = time.time()\n", + "\n", + "print(100.0 / (t1 - t0))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 3d763779eaf24e76d066060ee180f0b4e3ff8dc3 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 27 Jun 2019 15:39:49 -0400 Subject: [PATCH 129/355] optional plugin building --- setup.py | 20 +++++++++++++------- torch2trt/__init__.py | 2 +- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/setup.py b/setup.py index f5b413c5..f52f13b1 100644 --- a/setup.py +++ b/setup.py @@ -1,15 +1,21 @@ +import sys from setuptools import setup, find_packages from build import build -try: - build() -except e: - print('Could not build plugins') +package_data = {} + +if '--plugins' in sys.argv: + sys.argv.remove('--plugins') + try: + build() + package_data['torch2trt'] = ['libtorch2trt.so'] + except RuntimeError: + print('Could not build plugins') setup( name='torch2trt', - version='0.0', - description='PyTorch to TensorRT converter', + version='0.0.0', + description='An easy to use PyTorch to TensorRT converter', packages=find_packages(), - package_data={'torch2trt': ['libtorch2trt.so']} + package_data=package_data ) diff --git a/torch2trt/__init__.py b/torch2trt/__init__.py index affc5dbf..55802b51 100644 --- a/torch2trt/__init__.py +++ b/torch2trt/__init__.py @@ -11,5 +11,5 @@ def load_plugins(): try: load_plugins() PLUGINS_LOADED = True -except e: +except RuntimeError: PLUGINS_LOADED = False From 610df6b5caca714f63ccf70c598b35cbca6c869f Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 27 Jun 2019 15:50:06 -0400 Subject: [PATCH 130/355] fixed plugin check --- setup.py | 7 ++----- torch2trt/__init__.py | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/setup.py b/setup.py index f52f13b1..87a1b024 100644 --- a/setup.py +++ b/setup.py @@ -6,11 +6,8 @@ if '--plugins' in sys.argv: sys.argv.remove('--plugins') - try: - build() - package_data['torch2trt'] = ['libtorch2trt.so'] - except RuntimeError: - print('Could not build plugins') + build() + package_data['torch2trt'] = ['libtorch2trt.so'] setup( name='torch2trt', diff --git a/torch2trt/__init__.py b/torch2trt/__init__.py index 55802b51..00efdda6 100644 --- a/torch2trt/__init__.py +++ b/torch2trt/__init__.py @@ -11,5 +11,5 @@ def load_plugins(): try: load_plugins() PLUGINS_LOADED = True -except RuntimeError: +except OSError: PLUGINS_LOADED = False From 159475de2237da70a2cd3ec5c144621101d0fe41 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 27 Jun 2019 16:19:58 -0400 Subject: [PATCH 131/355] updated readme --- README.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/README.md b/README.md index d212862c..35a315cf 100644 --- a/README.md +++ b/README.md @@ -14,12 +14,29 @@ If you find an issue, please [let us know](../..//issues)! ## Setup +### Option 1 - Without plugins + +To install without compiling plugins, call the following + ```bash git clone https://github.com/NVIDIA-AI-IOT/torch2trt cd torch2trt sudo python setup.py install ``` +### Option 2 - With plugins (experimental) + +To install with plugins to support some operations in PyTorch that are not natviely supported with TensorRT, call the following + +> This is *experimental* and currently only includes a plugin for ``torch.nn.functional.interpolate`` + +```bash +sudo apt-get install libprotobuf* protobuf-compiler ninja-build +git clone https://github.com/NVIDIA-AI-IOT/torch2trt +cd torch2trt +sudo python setup.py install --plugins +``` + > torch2trt is tested against a system configured with the [JetCard](http://github.com/NVIDIA-AI-IOT/jetcard) setup. Different system configurations may require additional steps. ## Usage From 302c0ec919bf8d5d264cd8614d171693c42df130 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 27 Jun 2019 16:21:27 -0400 Subject: [PATCH 132/355] readme --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 35a315cf..ad3572f7 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ sudo python setup.py install To install with plugins to support some operations in PyTorch that are not natviely supported with TensorRT, call the following -> This is *experimental* and currently only includes a plugin for ``torch.nn.functional.interpolate`` +> This currently only includes a plugin for ``torch.nn.functional.interpolate`` ```bash sudo apt-get install libprotobuf* protobuf-compiler ninja-build @@ -61,13 +61,13 @@ model_trt = torch2trt(model, [x]) ### Execute -We can execute returned ``TRTModule`` just like the original PyTorch model +We can execute the returned ``TRTModule`` just like the original PyTorch model ```python y = model(x) y_trt = model_trt(x) -# check the output against +# check the output against PyTorch print(torch.max(torch.abs(y - y_trt))) ``` From 8d1a79952ea360e0118ddd98c2676529f2195162 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 27 Jun 2019 20:24:11 -0400 Subject: [PATCH 133/355] added transpose --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/identity.py | 1 + torch2trt/converters/transpose.py | 32 +++++++++++++++++++++++++++++++ torch2trt/torch2trt.py | 31 ++++++++++++++++++++---------- 4 files changed, 55 insertions(+), 10 deletions(-) create mode 100644 torch2trt/converters/transpose.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index cb80d58d..6382fbd7 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -18,3 +18,4 @@ from .relu6 import * from .ReLU6 import * from .view import * +from .transpose import * diff --git a/torch2trt/converters/identity.py b/torch2trt/converters/identity.py index 2f5f9f05..2b86f138 100644 --- a/torch2trt/converters/identity.py +++ b/torch2trt/converters/identity.py @@ -1,6 +1,7 @@ from torch2trt.torch2trt import * +@tensorrt_converter('torch.Tensor.contiguous') @tensorrt_converter('torch.nn.functional.dropout') @tensorrt_converter('torch.nn.functional.dropout2d') @tensorrt_converter('torch.nn.functional.dropout3d') diff --git a/torch2trt/converters/transpose.py b/torch2trt/converters/transpose.py new file mode 100644 index 00000000..3c93fd4b --- /dev/null +++ b/torch2trt/converters/transpose.py @@ -0,0 +1,32 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.transpose') +def convert_transpose(ctx): + input = ctx.method_args[0] + output = ctx.method_return + # permutation -1 because TRT does not include batch dim + permutation = list(range(len(input.shape) - 1)) + dim0 = ctx.method_args[1] - 1 + dim1 = ctx.method_args[2] - 1 + permutation[dim0] = dim1 + permutation[dim1] = dim0 + layer = ctx.network.add_shuffle(input._trt) + layer.second_transpose = tuple(permutation) + output._trt = layer.get_output(0) + + +class Transpose(torch.nn.Module): + def __init__(self, dim0, dim1): + super(Transpose, self).__init__() + self.dim0 = dim0 + self.dim1 = dim1 + def forward(self, x): + return torch.transpose(x, self.dim0, self.dim1).contiguous() + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_transpose_12(): + return Transpose(1, 2) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index ede57d01..9b19e25d 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -77,20 +77,30 @@ def attach_converter(ctx, method, converter): """Gets a function that executes PyTorch method and TensorRT converter""" def wrapper(*args, **kwargs): + skip = True + + # check if another (parent) converter has lock + if not ctx.lock: + ctx.lock = True + skip = False + # run original method outputs = method(*args, **kwargs) - # call conversion hook - ctx.method_args = args - ctx.method_kwargs = kwargs - ctx.method_return = outputs - #print('%s : %s' % (method.__qualname__, converter.__name__)) - converter(ctx) + if not skip: + # call conversion hook + ctx.method_args = args + ctx.method_kwargs = kwargs + ctx.method_return = outputs + + #print('%s : %s' % (method.__qualname__, converter.__name__)) + converter(ctx) - # convert to None so conversion will fail for unsupported layers - ctx.method_args = None - ctx.method_kwargs = None - ctx.method_return = None + # convert to None so conversion will fail for unsupported layers + ctx.method_args = None + ctx.method_kwargs = None + ctx.method_return = None + ctx.lock = False return outputs @@ -119,6 +129,7 @@ def __exit__(self, type, val, tb): class ConversionContext(object): def __init__(self, network, converters=CONVERTERS): self.network = network + self.lock = False self.method_args = None self.method_kwargs = None self.method_return = None From abbc465f1cf8fc94e0f167a9ea81f90582b64e07 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 28 Jun 2019 18:17:42 -0400 Subject: [PATCH 134/355] added mean --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/mean.py | 64 ++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 torch2trt/converters/mean.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 6382fbd7..7bf3d147 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -19,3 +19,4 @@ from .ReLU6 import * from .view import * from .transpose import * +from .mean import * \ No newline at end of file diff --git a/torch2trt/converters/mean.py b/torch2trt/converters/mean.py new file mode 100644 index 00000000..d662c44c --- /dev/null +++ b/torch2trt/converters/mean.py @@ -0,0 +1,64 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.mean') +@tensorrt_converter('torch.Tensor.mean') +def convert_mean(ctx): + input = ctx.method_args[0] + output = ctx.method_return + + # get dims from args or kwargs + if 'dim' in ctx.method_kwargs: + dim = ctx.method_kwargs['dim'] + elif len(ctx.method_args) >= 2: + dim = ctx.method_args[1] + + # convert dim to tuple + if not isinstance(dim, tuple): + dim = (dim, ) + + # create axes bitmask for reduce layer + axes = 0 + for d in dim: + axes |= 1 << (d - 1) # -1 to remove batch dimension + + # get whether to keep dimensions + if 'keepdim' in ctx.method_kwargs: + keep_dims = ctx.method_kwargs['keepdim'] + elif len(ctx.method_args) == 3: + keep_dims = ctx.method_args[2] + else: + keep_dims = False + + layer = ctx.network.add_reduce(input._trt, trt.ReduceOperation.AVG, axes, keep_dims) + output._trt = layer.get_output(0) + + +class Mean(torch.nn.Module): + def __init__(self, dim, keepdim): + super(Mean, self).__init__() + self.dim = dim + self.keepdim = keepdim + def forward(self, x): + return x.mean(self.dim, self.keepdim) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_mean_channel(): + return Mean(1, False) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_mean_tuple(): + return Mean((1, 2), False) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_mean_keepdim(): + return Mean(1, True) \ No newline at end of file From 909df62ce1d0cbe97afa90b72ba4a567cb03a870 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 28 Jun 2019 18:32:22 -0400 Subject: [PATCH 135/355] added support for dims as list --- torch2trt/converters/mean.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/torch2trt/converters/mean.py b/torch2trt/converters/mean.py index d662c44c..9e34e2d8 100644 --- a/torch2trt/converters/mean.py +++ b/torch2trt/converters/mean.py @@ -14,7 +14,10 @@ def convert_mean(ctx): elif len(ctx.method_args) >= 2: dim = ctx.method_args[1] - # convert dim to tuple + # convert list to tuple + if isinstance(dim, list): + dim = tuple(dim) + if not isinstance(dim, tuple): dim = (dim, ) From 7045e5001ade176ec07d2a48eeaa5d7b4e9d6e3f Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sat, 29 Jun 2019 03:02:38 -0700 Subject: [PATCH 136/355] plugin hotfix --- torch2trt/converters/__init__.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 7bf3d147..5362b5a0 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -9,7 +9,6 @@ from .ConvTranspose2d import * from .identity import * from .Identity import * -from .interpolate import * from .Linear import * from .LogSoftmax import * from .MaxPool2d import * @@ -19,4 +18,9 @@ from .ReLU6 import * from .view import * from .transpose import * -from .mean import * \ No newline at end of file +from .mean import * + +try: + from .interpolate import * +except: + pass \ No newline at end of file From b7e87ca3f1701ee37c2c64bbce3cb10e2d71dccf Mon Sep 17 00:00:00 2001 From: John Date: Sat, 29 Jun 2019 05:09:35 -0700 Subject: [PATCH 137/355] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index ad3572f7..7c0a14bd 100644 --- a/README.md +++ b/README.md @@ -168,5 +168,6 @@ Please see [this folder](torch2trt/converters) for more examples. - [JetBot](http://github.com/NVIDIA-AI-IOT/jetbot) - An educational AI robot based on NVIDIA Jetson Nano +- [JetRacer](http://github.com/NVIDIA-AI-IOT/jetracer) - An educational AI racecar using NVIDIA Jetson Nano - [JetCam](http://github.com/NVIDIA-AI-IOT/jetcam) - An easy to use Python camera interface for NVIDIA Jetson - [JetCard](http://github.com/NVIDIA-AI-IOT/jetcard) - An SD card image for web programming AI projects with NVIDIA Jetson Nano From 6ad77e759eac2eb0760c5529515f3e1e629b50f7 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Mon, 1 Jul 2019 12:15:01 -0700 Subject: [PATCH 138/355] Missing an import in the first example Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 7c0a14bd..83769c91 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ Below are some usage examples, for more check out the [notebooks](notebooks). ### Convert ```python +import torch from torch2trt import torch2trt from torchvision.models.alexnet import alexnet From 2379ded3ea7598c40b0d1486daedd6638d3838d4 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Tue, 2 Jul 2019 13:47:55 -0700 Subject: [PATCH 139/355] Adds a gitignore so that files generated by build.py and protobuf do not get tracked Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- .gitignore | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..03192ad5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +.ninja_deps +.ninja_log +build.ninja +*.o +*.pb.o +torch2trt.egg-info +build/ +dist/ +__pycache__/ +*.so +*.pb.h +*.pb.cc +*_pb2.py \ No newline at end of file From c26611254ecbe506aca2407ec0dc82c64a2d561d Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Mon, 1 Jul 2019 17:23:32 -0700 Subject: [PATCH 140/355] Adding the ability to specify the location of various dependency libraries (CUDA, PyTorch and TensorRT) when invoking setup.py Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- build.py | 25 +++++++++++++++---------- setup.py | 43 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 54 insertions(+), 14 deletions(-) diff --git a/build.py b/build.py index 68b103b1..0dca336d 100644 --- a/build.py +++ b/build.py @@ -9,22 +9,19 @@ BASE_FOLDER = 'torch2trt/converters' -NINJA_STR = Template( +NINJA_TEMPLATE = Template( """ rule link - command = g++ -shared -o $$out $$in -L$torch_dir/lib -L$cuda_dir/lib64 -lc10 -lc10_cuda -ltorch -lcudart -lcaffe2 -lcaffe2_gpu -lprotobuf -lprotobuf-lite -pthread -lpthread -lnvinfer + command = g++ -shared -o $$out $$in -L$torch_dir/lib -L$cuda_dir/lib64 -L$trt_lib_dir -lc10 -lc10_cuda -ltorch -lcudart -lcaffe2 -lcaffe2_gpu -lprotobuf -lprotobuf-lite -pthread -lpthread -lnvinfer rule protoc command = protoc $$in --cpp_out=. --python_out=. rule cxx - command = g++ -c -fPIC $$in -I$cuda_dir/include -I$torch_dir/include -I$torch_dir/include/torch/csrc/api/include -I. + command = g++ -c -fPIC $$in -I$cuda_dir/include -I$torch_dir/include -I$torch_dir/include/torch/csrc/api/include -I. -std=c++11 -I$trt_inc_dir """ -).substitute({ - 'torch_dir': imp.find_module('torch')[1], - 'cuda_dir': '/usr/local/cuda' -}) +) PLUGIN_TEMPLATE = Template( """ @@ -35,8 +32,16 @@ ) -def build(): - global PLUGINS, BASE_FOLDER, NINJA_STR, PLUGIN_TEMPLATE +def build(cuda_dir="/usr/local/cuda", torch_dir=imp.find_module('torch')[1], trt_inc_dir="/usr/include/x86_64-linux-gnu", trt_lib_dir="/usr/lib/x86_64-linux-gnu"): + global PLUGINS, BASE_FOLDER, NINJA_TEMPLATE, PLUGIN_TEMPLATE + + NINJA_STR = NINJA_TEMPLATE.substitute({ + 'torch_dir': torch_dir, + 'cuda_dir': cuda_dir, + 'trt_inc_dir': trt_inc_dir, + 'trt_lib_dir': trt_lib_dir, + }) + plugin_o_files = [] for plugin in PLUGINS: NINJA_STR += \ @@ -45,7 +50,7 @@ def build(): 'plugin_dir': os.path.join(BASE_FOLDER, plugin), }) plugin_o_files += [plugin + '.pb.o', plugin + '.o'] - + NINJA_STR += Template( """ build torch2trt/libtorch2trt.so: link $o_files diff --git a/setup.py b/setup.py index 87a1b024..c338e870 100644 --- a/setup.py +++ b/setup.py @@ -1,18 +1,53 @@ import sys +import argparse from setuptools import setup, find_packages +from setuptools.command.install import install from build import build package_data = {} -if '--plugins' in sys.argv: - sys.argv.remove('--plugins') - build() - package_data['torch2trt'] = ['libtorch2trt.so'] +class InstallCommand(install): + description = "Builds plugins" + user_options = install.user_options + [ + ('plugins', None, 'Build plugins'), + ('cuda-dir=', None, 'Location of CUDA (if not default location)'), + ('torch-dir=', None, 'Location of PyTorch (if not default location)'), + ('trt-inc-dir=', None, 'Location of TensorRT include files (if not default location)'), + ('trt-lib-dir=', None, 'Location of TensorRT libraries (if not default location)'), + ] + def initialize_options(self): + install.initialize_options(self) + self.plugins = False + self.cuda_dir = None + self.torch_dir = None + self.trt_inc_dir = None + self.trt_lib_dir = None + def finalize_options(self): + install.finalize_options(self) + def run(self): + if self.plugins: + build_args = {} + if self.cuda_dir: + build_args['cuda_dir'] = self.cuda_dir + if self.torch_dir: + build_args['torch_dir'] = self.torch_dir + if self.trt_inc_dir: + build_args['trt_inc_dir'] = self.trt_inc_dir + if self.trt_lib_dir: + build_args['trt_lib_dir'] = self.trt_lib_dir + print('Building in plugin support') + build(**build_args) + package_data['torch2trt'] = ['libtorch2trt.so'] + install.run(self) + setup( name='torch2trt', version='0.0.0', description='An easy to use PyTorch to TensorRT converter', + cmdclass={ + 'install': InstallCommand, + }, packages=find_packages(), package_data=package_data ) From 9fd54f8eedec0ff5ccefb3ce0da9d753f60601a5 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Tue, 2 Jul 2019 15:02:39 -0700 Subject: [PATCH 141/355] Adds a way to specify custom library locations and remove build artifacts adds `setup.py install` subcommands `--cuda-dir=` `--trt-inc-dir=` `--trt-lib-dir=` `--torch-dir=` adds a `setup.py clean` which will delete generated files moves `--plugin` into a custom install command with the other install subcommands Adds a -std=c++11 flag to the compile step (was hitting errors when trying to compile plugins) Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- build.py | 43 ++++++++++++++++++++++++------------------- setup.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 72 insertions(+), 23 deletions(-) diff --git a/build.py b/build.py index 0dca336d..715ec99e 100644 --- a/build.py +++ b/build.py @@ -10,31 +10,35 @@ BASE_FOLDER = 'torch2trt/converters' NINJA_TEMPLATE = Template( -""" -rule link - command = g++ -shared -o $$out $$in -L$torch_dir/lib -L$cuda_dir/lib64 -L$trt_lib_dir -lc10 -lc10_cuda -ltorch -lcudart -lcaffe2 -lcaffe2_gpu -lprotobuf -lprotobuf-lite -pthread -lpthread -lnvinfer + """ + rule link + command = g++ -shared -o $$out $$in -L$torch_dir/lib -L$cuda_dir/lib64 -L$trt_lib_dir -lc10 -lc10_cuda -ltorch -lcudart -lcaffe2 -lcaffe2_gpu -lprotobuf -lprotobuf-lite -pthread -lpthread -lnvinfer -rule protoc - command = protoc $$in --cpp_out=. --python_out=. + rule protoc + command = protoc $$in --cpp_out=. --python_out=. -rule cxx - command = g++ -c -fPIC $$in -I$cuda_dir/include -I$torch_dir/include -I$torch_dir/include/torch/csrc/api/include -I. -std=c++11 -I$trt_inc_dir + rule cxx + command = g++ -c -fPIC $$in -I$cuda_dir/include -I$torch_dir/include -I$torch_dir/include/torch/csrc/api/include -I. -std=c++11 -I$trt_inc_dir -""" + """ ) PLUGIN_TEMPLATE = Template( -""" -build $plugin_dir/$plugin.pb.h $plugin_dir/$plugin.pb.cc $plugin_dir/${plugin}_pb2.py: protoc $plugin_dir/$plugin.proto -build $plugin.pb.o: cxx $plugin_dir/$plugin.pb.cc -build $plugin.o: cxx $plugin_dir/$plugin.cpp -""" + """ + build $plugin_dir/$plugin.pb.h $plugin_dir/$plugin.pb.cc $plugin_dir/${plugin}_pb2.py: protoc $plugin_dir/$plugin.proto + build $plugin.pb.o: cxx $plugin_dir/$plugin.pb.cc + build $plugin.o: cxx $plugin_dir/$plugin.cpp + """ ) -def build(cuda_dir="/usr/local/cuda", torch_dir=imp.find_module('torch')[1], trt_inc_dir="/usr/include/x86_64-linux-gnu", trt_lib_dir="/usr/lib/x86_64-linux-gnu"): +def build(cuda_dir="/usr/local/cuda", + torch_dir=imp.find_module('torch')[1], + trt_inc_dir="/usr/include/aarch64-linux-gnu", + trt_lib_dir="/usr/lib/aarch64-linux-gnu"): + global PLUGINS, BASE_FOLDER, NINJA_TEMPLATE, PLUGIN_TEMPLATE - + NINJA_STR = NINJA_TEMPLATE.substitute({ 'torch_dir': torch_dir, 'cuda_dir': cuda_dir, @@ -50,11 +54,11 @@ def build(cuda_dir="/usr/local/cuda", torch_dir=imp.find_module('torch')[1], trt 'plugin_dir': os.path.join(BASE_FOLDER, plugin), }) plugin_o_files += [plugin + '.pb.o', plugin + '.o'] - + NINJA_STR += Template( -""" -build torch2trt/libtorch2trt.so: link $o_files -""" + """ + build torch2trt/libtorch2trt.so: link $o_files + """ ).substitute({'o_files': ' '.join(plugin_o_files)}) with open('build.ninja', 'w') as f: @@ -62,5 +66,6 @@ def build(cuda_dir="/usr/local/cuda", torch_dir=imp.find_module('torch')[1], trt subprocess.call(['ninja']) + if __name__ == '__main__': build() diff --git a/setup.py b/setup.py index c338e870..b0d71936 100644 --- a/setup.py +++ b/setup.py @@ -1,13 +1,16 @@ -import sys -import argparse +import os +import glob +import shutil from setuptools import setup, find_packages from setuptools.command.install import install +from distutils.cmd import Command from build import build package_data = {} + class InstallCommand(install): - description = "Builds plugins" + description = "Builds the package" user_options = install.user_options + [ ('plugins', None, 'Build plugins'), ('cuda-dir=', None, 'Location of CUDA (if not default location)'), @@ -15,6 +18,7 @@ class InstallCommand(install): ('trt-inc-dir=', None, 'Location of TensorRT include files (if not default location)'), ('trt-lib-dir=', None, 'Location of TensorRT libraries (if not default location)'), ] + def initialize_options(self): install.initialize_options(self) self.plugins = False @@ -22,8 +26,10 @@ def initialize_options(self): self.torch_dir = None self.trt_inc_dir = None self.trt_lib_dir = None + def finalize_options(self): install.finalize_options(self) + def run(self): if self.plugins: build_args = {} @@ -40,13 +46,51 @@ def run(self): build(**build_args) package_data['torch2trt'] = ['libtorch2trt.so'] install.run(self) - + + +class CleanCommand(Command): + """Custom clean command to tidy up the project root.""" + PY_CLEAN_FILES = './build ./dist ./__pycache__ ./*.pyc ./*.tgz ./*.egg-info'.split(' ') + description = "Command to tidy up the project root" + user_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + root_dir = os.path.dirname(os.path.realpath(__file__)) + for path_spec in self.PY_CLEAN_FILES: + # Make paths absolute and relative to this path + abs_paths = glob.glob(os.path.normpath(os.path.join(root_dir, path_spec))) + for path in [str(p) for p in abs_paths]: + if not path.startswith(root_dir): + # Die if path in CLEAN_FILES is absolute + outside this directory + raise ValueError("%s is not a path inside %s" % (path, root_dir)) + print('removing %s' % os.path.relpath(path)) + shutil.rmtree(path) + + cmd_list = { + "Removing generated protobuf cc files": "find . -name '*.pb.cc' -print0 | xargs -0 rm -f;", + "Removing generated protobuf h files": "find . -name '*.pb.h' -print0 | xargs -0 rm -f;", + "Removing generated protobuf py files": "find . -name '*_pb2.py' -print0 | xargs -0 rm -f;", + "Removing generated ninja files": "find . -name '*.ninja*' -print0 | xargs -0 rm -f;" + } + + for cmd, script in cmd_list.items(): + print("Running {}".format(cmd)) + os.system(script) + + setup( name='torch2trt', version='0.0.0', description='An easy to use PyTorch to TensorRT converter', cmdclass={ 'install': InstallCommand, + 'clean': CleanCommand, }, packages=find_packages(), package_data=package_data From 2f7582e19fb62fab72d1b2faf1e1984ad9dfed7d Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 2 Jul 2019 21:21:29 -0400 Subject: [PATCH 142/355] added softmax --- torch2trt/converters/__init__.py | 3 ++- torch2trt/converters/softmax.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 torch2trt/converters/softmax.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 5362b5a0..55144e00 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -19,8 +19,9 @@ from .view import * from .transpose import * from .mean import * +from .softmax import * try: from .interpolate import * except: - pass \ No newline at end of file + pass diff --git a/torch2trt/converters/softmax.py b/torch2trt/converters/softmax.py new file mode 100644 index 00000000..9d174fce --- /dev/null +++ b/torch2trt/converters/softmax.py @@ -0,0 +1,32 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.nn.functional.softmax') +def convert_softmax(ctx): + input = ctx.method_args[0] + output = ctx.method_return + + # get dims from args or kwargs + if 'dim' in ctx.method_kwargs: + dim = ctx.method_kwargs['dim'] + elif len(ctx.method_args) >= 2: + dim = ctx.method_args[1] + + axes = 1 << (dim - 1) + + layer = ctx.network.add_softmax(input=input._trt) + layer.axes = axes + + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_softmax_module(): + return torch.nn.Softmax(1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_softmax_module_dim2(): + return torch.nn.Softmax(2) From 9ca3b3212131c29c1fac6460b08bb7043fb2bd86 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Wed, 3 Jul 2019 15:21:42 -0700 Subject: [PATCH 143/355] Fixes some intedentation errors, adds same subcommands to develop There were some indentation errors with the template due to the linter this commit should resolve that. It also adds the same subcommands to `setup.py develop` that were added to `setup.py install` and unifies the implementation of the custom compilation between the two. Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- build.py | 43 +++++++++++---------------- setup.py | 90 +++++++++++++++++++++++++++++++++++++------------------- 2 files changed, 77 insertions(+), 56 deletions(-) diff --git a/build.py b/build.py index 715ec99e..f4528811 100644 --- a/build.py +++ b/build.py @@ -9,27 +9,20 @@ BASE_FOLDER = 'torch2trt/converters' -NINJA_TEMPLATE = Template( - """ - rule link - command = g++ -shared -o $$out $$in -L$torch_dir/lib -L$cuda_dir/lib64 -L$trt_lib_dir -lc10 -lc10_cuda -ltorch -lcudart -lcaffe2 -lcaffe2_gpu -lprotobuf -lprotobuf-lite -pthread -lpthread -lnvinfer - - rule protoc - command = protoc $$in --cpp_out=. --python_out=. - - rule cxx - command = g++ -c -fPIC $$in -I$cuda_dir/include -I$torch_dir/include -I$torch_dir/include/torch/csrc/api/include -I. -std=c++11 -I$trt_inc_dir - - """ -) - -PLUGIN_TEMPLATE = Template( - """ - build $plugin_dir/$plugin.pb.h $plugin_dir/$plugin.pb.cc $plugin_dir/${plugin}_pb2.py: protoc $plugin_dir/$plugin.proto - build $plugin.pb.o: cxx $plugin_dir/$plugin.pb.cc - build $plugin.o: cxx $plugin_dir/$plugin.cpp - """ -) +NINJA_TEMPLATE = Template(( + "rule link\n" + " command = g++ -shared -o $$out $$in -L$torch_dir/lib -L$cuda_dir/lib64 -L$trt_lib_dir -lc10 -lc10_cuda -ltorch -lcudart -lcaffe2 -lcaffe2_gpu -lprotobuf -lprotobuf-lite -pthread -lpthread -lnvinfer\n" + "rule protoc\n" + " command = protoc $$in --cpp_out=. --python_out=.\n" + "rule cxx\n" + " command = g++ -c -fPIC $$in -I$cuda_dir/include -I$torch_dir/include -I$torch_dir/include/torch/csrc/api/include -I. -std=c++11 -I$trt_inc_dir\n" +)) + +PLUGIN_TEMPLATE = Template(( + "build $plugin_dir/$plugin.pb.h $plugin_dir/$plugin.pb.cc $plugin_dir/${plugin}_pb2.py: protoc $plugin_dir/$plugin.proto\n" + "build $plugin.pb.o: cxx $plugin_dir/$plugin.pb.cc\n" + "build $plugin.o: cxx $plugin_dir/$plugin.cpp\n" +)) def build(cuda_dir="/usr/local/cuda", @@ -55,11 +48,9 @@ def build(cuda_dir="/usr/local/cuda", }) plugin_o_files += [plugin + '.pb.o', plugin + '.o'] - NINJA_STR += Template( - """ - build torch2trt/libtorch2trt.so: link $o_files - """ - ).substitute({'o_files': ' '.join(plugin_o_files)}) + NINJA_STR += Template(( + "build torch2trt/libtorch2trt.so: link $o_files\n" + )).substitute({'o_files': ' '.join(plugin_o_files)}) with open('build.ninja', 'w') as f: f.write(NINJA_STR) diff --git a/setup.py b/setup.py index b0d71936..a2b8dded 100644 --- a/setup.py +++ b/setup.py @@ -3,54 +3,81 @@ import shutil from setuptools import setup, find_packages from setuptools.command.install import install +from setuptools.command.develop import develop from distutils.cmd import Command from build import build package_data = {} +plugins_user_options = [ + ('plugins', None, 'Build plugins'), + ('cuda-dir=', None, 'Location of CUDA (if not default location)'), + ('torch-dir=', None, 'Location of PyTorch (if not default location)'), + ('trt-inc-dir=', None, 'Location of TensorRT include files (if not default location)'), + ('trt-lib-dir=', None, 'Location of TensorRT libraries (if not default location)'), +] + + +def initialize_plugins_options(cmd_obj): + cmd_obj.plugins = False + cmd_obj.cuda_dir = None + cmd_obj.torch_dir = None + cmd_obj.trt_inc_dir = None + cmd_obj.trt_lib_dir = None + + +def run_plugins_compilation(cmd_obj): + if cmd_obj.plugins: + build_args = {} + if cmd_obj.cuda_dir: + build_args['cuda_dir'] = cmd_obj.cuda_dir + if cmd_obj.torch_dir: + build_args['torch_dir'] = cmd_obj.torch_dir + if cmd_obj.trt_inc_dir: + build_args['trt_inc_dir'] = cmd_obj.trt_inc_dir + if cmd_obj.trt_lib_dir: + build_args['trt_lib_dir'] = cmd_obj.trt_lib_dir + + print('Building in plugin support') + build(**build_args) + package_data['torch2trt'] = ['libtorch2trt.so'] + + +class DevelopCommand(develop): + description = "Builds the package and symlinks it into the PYTHONPATH" + user_options = develop.user_options + plugins_user_options + + def initialize_options(self): + develop.initialize_options(self) + initialize_plugins_options(self) + + def finalize_options(self): + develop.finalize_options(self) + + def run(self): + run_plugins_compilation(self) + develop.run(self) + class InstallCommand(install): description = "Builds the package" - user_options = install.user_options + [ - ('plugins', None, 'Build plugins'), - ('cuda-dir=', None, 'Location of CUDA (if not default location)'), - ('torch-dir=', None, 'Location of PyTorch (if not default location)'), - ('trt-inc-dir=', None, 'Location of TensorRT include files (if not default location)'), - ('trt-lib-dir=', None, 'Location of TensorRT libraries (if not default location)'), - ] + user_options = install.user_options + plugins_user_options def initialize_options(self): install.initialize_options(self) - self.plugins = False - self.cuda_dir = None - self.torch_dir = None - self.trt_inc_dir = None - self.trt_lib_dir = None + initialize_plugins_options(self) def finalize_options(self): install.finalize_options(self) def run(self): - if self.plugins: - build_args = {} - if self.cuda_dir: - build_args['cuda_dir'] = self.cuda_dir - if self.torch_dir: - build_args['torch_dir'] = self.torch_dir - if self.trt_inc_dir: - build_args['trt_inc_dir'] = self.trt_inc_dir - if self.trt_lib_dir: - build_args['trt_lib_dir'] = self.trt_lib_dir - - print('Building in plugin support') - build(**build_args) - package_data['torch2trt'] = ['libtorch2trt.so'] + run_plugins_compilation(self) install.run(self) class CleanCommand(Command): """Custom clean command to tidy up the project root.""" - PY_CLEAN_FILES = './build ./dist ./__pycache__ ./*.pyc ./*.tgz ./*.egg-info'.split(' ') + PY_CLEAN_FILES = ['./build', './dist', './__pycache__', './*.pyc', './*.tgz', './*.egg-info'] description = "Command to tidy up the project root" user_options = [] @@ -69,18 +96,20 @@ def run(self): if not path.startswith(root_dir): # Die if path in CLEAN_FILES is absolute + outside this directory raise ValueError("%s is not a path inside %s" % (path, root_dir)) - print('removing %s' % os.path.relpath(path)) + print('Removing %s' % os.path.relpath(path)) shutil.rmtree(path) cmd_list = { "Removing generated protobuf cc files": "find . -name '*.pb.cc' -print0 | xargs -0 rm -f;", "Removing generated protobuf h files": "find . -name '*.pb.h' -print0 | xargs -0 rm -f;", "Removing generated protobuf py files": "find . -name '*_pb2.py' -print0 | xargs -0 rm -f;", - "Removing generated ninja files": "find . -name '*.ninja*' -print0 | xargs -0 rm -f;" + "Removing generated ninja files": "find . -name '*.ninja*' -print0 | xargs -0 rm -f;", + "Removing generated o files": "find . -name '*.o' -print0 | xargs -0 rm -f;", + "Removing generated so files": "find . -name '*.so' -print0 | xargs -0 rm -f;", } for cmd, script in cmd_list.items(): - print("Running {}".format(cmd)) + print("{}".format(cmd)) os.system(script) @@ -91,6 +120,7 @@ def run(self): cmdclass={ 'install': InstallCommand, 'clean': CleanCommand, + 'develop': DevelopCommand, }, packages=find_packages(), package_data=package_data From c06c16f923f8ca828bf64ff1036efd0e4906f82e Mon Sep 17 00:00:00 2001 From: GeoffreyChen Date: Wed, 24 Jul 2019 18:56:30 +0800 Subject: [PATCH 144/355] add mul --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/mul.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 torch2trt/converters/mul.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 55144e00..1d0d2c3d 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -1,6 +1,7 @@ from .adaptive_avg_pool2d import * from .AdaptiveAvgPool2d import * from .add import * +from .mul import * from .iadd import * from .AvgPool2d import * from .BatchNorm2d import * diff --git a/torch2trt/converters/mul.py b/torch2trt/converters/mul.py new file mode 100644 index 00000000..ed84f208 --- /dev/null +++ b/torch2trt/converters/mul.py @@ -0,0 +1,23 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.Tensor.__mul__') +def convert_mul(ctx): + input_a = ctx.method_args[0] + input_b = ctx.method_args[1] + output = ctx.method_return + layer = ctx.network.add_elementwise(input_a._trt, input_b._trt, trt.ElementWiseOperation.PROD) + output._trt = layer.get_output(0) + + +class Mul(torch.nn.Module): + def __init__(self): + super(Mul, self).__init__() + + def forward(self, x, y): + return x * y + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) +def test_mul_basic(): + return Mul() \ No newline at end of file From 65b9e6502540177bf009d363978de403bc6e9e39 Mon Sep 17 00:00:00 2001 From: GeoffreyChen Date: Wed, 24 Jul 2019 19:30:37 +0800 Subject: [PATCH 145/355] add div --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/div.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 torch2trt/converters/div.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 1d0d2c3d..c3a2a7af 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -2,6 +2,7 @@ from .AdaptiveAvgPool2d import * from .add import * from .mul import * +from .div import * from .iadd import * from .AvgPool2d import * from .BatchNorm2d import * diff --git a/torch2trt/converters/div.py b/torch2trt/converters/div.py new file mode 100644 index 00000000..a40f09a0 --- /dev/null +++ b/torch2trt/converters/div.py @@ -0,0 +1,23 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.Tensor.__truediv__') +def convert_div(ctx): + input_a = ctx.method_args[0] + input_b = ctx.method_args[1] + output = ctx.method_return + layer = ctx.network.add_elementwise(input_a._trt, input_b._trt, trt.ElementWiseOperation.DIV) + output._trt = layer.get_output(0) + + +class Div(torch.nn.Module): + def __init__(self): + super(Div, self).__init__() + + def forward(self, x, y): + return x / y + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) +def test_div_basic(): + return Div() \ No newline at end of file From ad7bcdcf1036199d10593b471250a069a7645637 Mon Sep 17 00:00:00 2001 From: John Date: Thu, 25 Jul 2019 13:31:01 -0700 Subject: [PATCH 146/355] added support for strict_type_constraints (#26) --- torch2trt/torch2trt.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 9b19e25d..9f59a67a 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -235,7 +235,7 @@ def forward(self, *inputs): def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt.Logger.ERROR, max_batch_size=1, - fp16_mode=False, max_workspace_size=0): + fp16_mode=False, max_workspace_size=0, strict_type_constraints=False): # copy inputs to avoid modifications to source data inputs = [tensor.clone() for tensor in inputs] @@ -260,6 +260,7 @@ def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt builder.max_workspace_size = max_workspace_size builder.fp16_mode = fp16_mode builder.max_batch_size = max_batch_size + builder.strict_type_constraints = strict_type_constraints engine = builder.build_cuda_engine(network) From 2b499f95a2ab52fe4ed8951486a0572fbbbbec72 Mon Sep 17 00:00:00 2001 From: vfdev Date: Fri, 26 Jul 2019 20:35:01 +0200 Subject: [PATCH 147/355] Fix typo (#27) --- notebooks/image_segmentation/conversion.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/image_segmentation/conversion.ipynb b/notebooks/image_segmentation/conversion.ipynb index 077c53ee..b540b34c 100644 --- a/notebooks/image_segmentation/conversion.ipynb +++ b/notebooks/image_segmentation/conversion.ipynb @@ -40,7 +40,7 @@ " super(ModelWrapper, self).__init__()\n", " self.model = model\n", " def forward(self, x):\n", - " return model(x)['out']" + " return self.model(x)['out']" ] }, { From 5b28a58253503ae743248141a8f3833fe3fe501b Mon Sep 17 00:00:00 2001 From: John Date: Fri, 26 Jul 2019 16:08:58 -0700 Subject: [PATCH 148/355] added pad converter (#28) --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/pad.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 torch2trt/converters/pad.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index c3a2a7af..0f077b3d 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -14,6 +14,7 @@ from .Linear import * from .LogSoftmax import * from .MaxPool2d import * +from .pad import * from .relu import * from .ReLU import * from .relu6 import * diff --git a/torch2trt/converters/pad.py b/torch2trt/converters/pad.py new file mode 100644 index 00000000..6d8ffeb8 --- /dev/null +++ b/torch2trt/converters/pad.py @@ -0,0 +1,32 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.nn.functional.pad') +def convert_pad(ctx): + input = ctx.method_args[0] + output = ctx.method_return + + pad = ctx.method_args[1] + pre_padding = (pad[2], pad[0]) + post_padding = (pad[3], pad[1]) + + # mode / value are ignored since not supported by TensorRT + + layer = ctx.network.add_padding(input._trt, pre_padding, post_padding) + output._trt = layer.get_output(0) + + +class Pad(torch.nn.Module): + + def __init__(self, pad): + super(Pad, self).__init__() + self.pad = pad + + def forward(self, x): + return torch.nn.functional.pad(x, self.pad) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_pad_basic(): + return Pad((1, 2, 3, 4)) \ No newline at end of file From 747a21205fe5dfe509857f75ebfc234a343e97b5 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 26 Jul 2019 19:17:38 -0400 Subject: [PATCH 149/355] added sigmoid --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/sigmoid.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 torch2trt/converters/sigmoid.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 0f077b3d..cb5a5948 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -19,6 +19,7 @@ from .ReLU import * from .relu6 import * from .ReLU6 import * +from .sigmoid import * from .view import * from .transpose import * from .mean import * diff --git a/torch2trt/converters/sigmoid.py b/torch2trt/converters/sigmoid.py new file mode 100644 index 00000000..ed75d5e9 --- /dev/null +++ b/torch2trt/converters/sigmoid.py @@ -0,0 +1,17 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.nn.functional.sigmoid') +@tensorrt_converter('torch.sigmoid') +def convert_sigmoid(ctx): + input = ctx.method_args[0] + output = ctx.method_return + + layer = ctx.network.add_activation(input._trt, trt.ActivationType.SIGMOID) + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_sigmoid_basic(): + return torch.nn.Sigmoid() \ No newline at end of file From cacee7baba6ce2e1b03982d53b8de030bcf83cb5 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 26 Jul 2019 19:23:08 -0400 Subject: [PATCH 150/355] added tanh --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/tanh.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 torch2trt/converters/tanh.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index cb5a5948..ba58c21c 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -21,6 +21,7 @@ from .ReLU6 import * from .sigmoid import * from .view import * +from .tanh import * from .transpose import * from .mean import * from .softmax import * diff --git a/torch2trt/converters/tanh.py b/torch2trt/converters/tanh.py new file mode 100644 index 00000000..5eb5eef3 --- /dev/null +++ b/torch2trt/converters/tanh.py @@ -0,0 +1,17 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.nn.functional.tanh') +@tensorrt_converter('torch.tanh') +def convert_tanh(ctx): + input = ctx.method_args[0] + output = ctx.method_return + + layer = ctx.network.add_activation(input._trt, trt.ActivationType.TANH) + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_tanh_basic(): + return torch.nn.Tanh() \ No newline at end of file From fb4acad80d6d34631792884a40f35190efa0aaf8 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 28 Jul 2019 20:55:41 -0400 Subject: [PATCH 151/355] added support for torch.flatten --- torch2trt/converters/view.py | 1 + 1 file changed, 1 insertion(+) diff --git a/torch2trt/converters/view.py b/torch2trt/converters/view.py index 6920c39f..d5255239 100644 --- a/torch2trt/converters/view.py +++ b/torch2trt/converters/view.py @@ -2,6 +2,7 @@ from torch2trt.module_test import add_module_test +@tensorrt_converter('torch.flatten') @tensorrt_converter('torch.Tensor.reshape') @tensorrt_converter('torch.Tensor.view') def convert_view(ctx): From 6102003f94e415e027dcbe1cf5633b315cde9c44 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 28 Jul 2019 20:55:41 -0400 Subject: [PATCH 152/355] added support for torch.flatten --- torch2trt/converters/view.py | 1 + 1 file changed, 1 insertion(+) diff --git a/torch2trt/converters/view.py b/torch2trt/converters/view.py index 6920c39f..d5255239 100644 --- a/torch2trt/converters/view.py +++ b/torch2trt/converters/view.py @@ -2,6 +2,7 @@ from torch2trt.module_test import add_module_test +@tensorrt_converter('torch.flatten') @tensorrt_converter('torch.Tensor.reshape') @tensorrt_converter('torch.Tensor.view') def convert_view(ctx): From e281577a8f6ab7022b75f482980bc1056964cd2d Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 28 Jul 2019 21:36:18 -0400 Subject: [PATCH 153/355] added Conv1d --- torch2trt/converters/Conv1d.py | 63 ++++++++++++++++++++++++++++++++ torch2trt/converters/__init__.py | 1 + 2 files changed, 64 insertions(+) create mode 100644 torch2trt/converters/Conv1d.py diff --git a/torch2trt/converters/Conv1d.py b/torch2trt/converters/Conv1d.py new file mode 100644 index 00000000..c65502e0 --- /dev/null +++ b/torch2trt/converters/Conv1d.py @@ -0,0 +1,63 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.nn.Conv1d.forward') +def convert_Conv1d(ctx): + module = ctx.method_args[0] + input = ctx.method_args[1] + output = ctx.method_return + + kernel_size = (module.kernel_size[0], 1) + stride = (module.stride[0], 1) + padding = (module.padding[0], 0) + dilation = (module.dilation[0], 1) + + kernel = module.weight.detach().cpu().numpy()[..., None] + + bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) + if module.bias is not None: + bias = module.bias.detach().cpu().numpy() + + # reshape to 2D + layer = ctx.network.add_shuffle(input._trt) + layer.reshape_dims = (-1, input.shape[-1], 1) + + layer = ctx.network.add_convolution( + input=layer.get_output(0), + num_output_maps=module.out_channels, + kernel_shape=kernel_size, + kernel=kernel, + bias=bias) + layer.stride = stride + layer.padding = padding + layer.dilation = dilation + + if module.groups is not None: + layer.num_groups = module.groups + + # reshape back to 1D + layer = ctx.network.add_shuffle(layer.get_output(0)) + layer.reshape_dims = (-1, output.shape[-1]) + + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224)]) +def test_Conv1d_basic(): + return torch.nn.Conv1d(10, 5, kernel_size=1, stride=1, padding=0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224)]) +def test_Conv1d_stride2(): + return torch.nn.Conv1d(10, 5, kernel_size=1, stride=2, padding=0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224)]) +def test_Conv1d_kernel3(): + return torch.nn.Conv1d(10, 5, kernel_size=3, stride=2, padding=1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224)]) +def test_Conv1d_dilation2(): + return torch.nn.Conv1d(10, 5, kernel_size=3, stride=1, padding=1, dilation=2) diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index ba58c21c..de0a6f6c 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -7,6 +7,7 @@ from .AvgPool2d import * from .BatchNorm2d import * from .cat import * +from .Conv1d import * from .Conv2d import * from .ConvTranspose2d import * from .identity import * From ba5480407f8cc3fe4b83d08edd5a3a0d4037830e Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 28 Jul 2019 21:51:25 -0400 Subject: [PATCH 154/355] added BatchNorm1d --- torch2trt/converters/BatchNorm1d.py | 31 +++++++++++++++++++++++++++++ torch2trt/converters/__init__.py | 1 + 2 files changed, 32 insertions(+) create mode 100644 torch2trt/converters/BatchNorm1d.py diff --git a/torch2trt/converters/BatchNorm1d.py b/torch2trt/converters/BatchNorm1d.py new file mode 100644 index 00000000..383f3ead --- /dev/null +++ b/torch2trt/converters/BatchNorm1d.py @@ -0,0 +1,31 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.nn.BatchNorm1d.forward') +def convert_BatchNorm2d(ctx): + module = ctx.method_args[0] + input = ctx.method_args[1] + output = ctx.method_return + + scale = module.weight.detach().cpu().numpy() / np.sqrt(module.running_var.detach().cpu().numpy() + module.eps) + bias = module.bias.detach().cpu().numpy() - module.running_mean.detach().cpu().numpy() * scale + power = np.ones_like(scale) + + # reshape to 2D + layer = ctx.network.add_shuffle(input._trt) + layer.reshape_dims = (-1, input.shape[-1], 1) + + layer = ctx.network.add_scale(layer.get_output(0), trt.ScaleMode.CHANNEL, bias, scale, power) + + # reshape back to 2D + layer = ctx.network.add_shuffle(layer.get_output(0)) + layer.reshape_dims = (-1, output.shape[-1]) + + output._trt = layer.get_output(0) + + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3)]) +def test_BatchNorm1d_basic(): + return torch.nn.BatchNorm1d(10) \ No newline at end of file diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index de0a6f6c..7dbc8b43 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -5,6 +5,7 @@ from .div import * from .iadd import * from .AvgPool2d import * +from .BatchNorm1d import * from .BatchNorm2d import * from .cat import * from .Conv1d import * From 3f1444cb753075faac66e127fea016ebb755a243 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 1 Aug 2019 16:00:53 -0400 Subject: [PATCH 155/355] added plugin registry --- torch2trt/__init__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/torch2trt/__init__.py b/torch2trt/__init__.py index 00efdda6..625a19ae 100644 --- a/torch2trt/__init__.py +++ b/torch2trt/__init__.py @@ -1,11 +1,17 @@ from .torch2trt import * from .converters import * +import tensorrt as trt def load_plugins(): import os import ctypes ctypes.CDLL(os.path.join(os.path.dirname(__file__), 'libtorch2trt.so')) + + registry = trt.get_plugin_registry() + torch2trt_creators = [c for c in registry.plugin_creator_list if c.plugin_namespace == 'torch2trt'] + for c in torch2trt_creators: + registry.register_creator(c, 'torch2trt') try: From c3889fb37553a474672d6c736dd9b47048c521b4 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 2 Aug 2019 17:36:50 -0400 Subject: [PATCH 156/355] fixed failure to run deserialized plugin by adding params to protobuf --- .../converters/interpolate/interpolate.cpp | 34 ++++++++++++++++--- .../converters/interpolate/interpolate.proto | 13 +++++++ 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/torch2trt/converters/interpolate/interpolate.cpp b/torch2trt/converters/interpolate/interpolate.cpp index 6f68846d..c35b4fd5 100644 --- a/torch2trt/converters/interpolate/interpolate.cpp +++ b/torch2trt/converters/interpolate/interpolate.cpp @@ -59,23 +59,47 @@ class interpolate_Plugin : public IPluginV2 { void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) override { - tensor_options = tensor_options.device(c10::kCUDA); + + // set data type if (type == DataType::kFLOAT) { - tensor_options = tensor_options.dtype(c10::kFloat); + message.set_dtype(DataTypeMessage::kFloat); } else if (type == DataType::kHALF) { tensor_options = tensor_options.dtype(c10::kHalf); + message.set_dtype(DataTypeMessage::kHalf); } - + + // set input sizes for (int i = 0; i < inputDims[0].nbDims; i++) { - input_sizes.push_back(inputDims[0].d[i]); + message.add_input_size(inputDims[0].d[i]); } + // set output sizes for (int i = 0; i < outputDims[0].nbDims; i++) { - output_sizes.push_back(outputDims[0].d[i]); + message.add_output_size(outputDims[0].d[i]); } } int initialize() override { + // set device + tensor_options = tensor_options.device(c10::kCUDA); + + // set data type + if (message.dtype() == DataTypeMessage::kFloat) { + tensor_options = tensor_options.dtype(c10::kFloat); + } else if (message.dtype() == DataTypeMessage::kHalf) { + tensor_options = tensor_options.dtype(c10::kHalf); + } + + input_sizes.resize(message.input_size_size()); + output_sizes.resize(message.output_size_size()); + + for (int i = 0; i < message.input_size_size(); i++) { + input_sizes[i] = message.input_size(i); + } + for (int i = 0; i < message.output_size_size(); i++) { + output_sizes[i] = message.output_size(i); + } + return 0; } diff --git a/torch2trt/converters/interpolate/interpolate.proto b/torch2trt/converters/interpolate/interpolate.proto index b91db045..34cc8585 100644 --- a/torch2trt/converters/interpolate/interpolate.proto +++ b/torch2trt/converters/interpolate/interpolate.proto @@ -3,8 +3,21 @@ syntax = "proto3"; package torch2trt; +enum DataTypeMessage { + kFloat = 0; + kHalf = 1; + kInt8 = 2; + kInt32 = 3; +} + + message interpolate_Message { repeated int64 size = 1; string mode = 2; bool align_corners = 3; + + // below params are configured by TRT and not set by user + DataTypeMessage dtype = 4; + repeated int64 input_size = 5; + repeated int64 output_size = 6; } From eeb9253a44152209871aa79f7fdcc9208aabc36f Mon Sep 17 00:00:00 2001 From: John Date: Tue, 6 Aug 2019 11:24:23 -0700 Subject: [PATCH 157/355] Update README.md --- README.md | 55 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 7c0a14bd..0bededb6 100644 --- a/README.md +++ b/README.md @@ -12,33 +12,6 @@ If you find an issue, please [let us know](../..//issues)! > Please note, this converter has limited coverage of TensorRT / PyTorch. We created it primarily > to easily optimize the models used in the [JetBot](https://github.com/NVIDIA-AI-IOT/jetbot) project. If you find the converter helpful with other models, please [let us know](../..//issues). -## Setup - -### Option 1 - Without plugins - -To install without compiling plugins, call the following - -```bash -git clone https://github.com/NVIDIA-AI-IOT/torch2trt -cd torch2trt -sudo python setup.py install -``` - -### Option 2 - With plugins (experimental) - -To install with plugins to support some operations in PyTorch that are not natviely supported with TensorRT, call the following - -> This currently only includes a plugin for ``torch.nn.functional.interpolate`` - -```bash -sudo apt-get install libprotobuf* protobuf-compiler ninja-build -git clone https://github.com/NVIDIA-AI-IOT/torch2trt -cd torch2trt -sudo python setup.py install --plugins -``` - -> torch2trt is tested against a system configured with the [JetCard](http://github.com/NVIDIA-AI-IOT/jetcard) setup. Different system configurations may require additional steps. - ## Usage Below are some usage examples, for more check out the [notebooks](notebooks). @@ -123,6 +96,34 @@ We tested the converter against these models using the [test.sh](test.sh) script | vgg19_bn | | | 51.4 | 121 | +## Setup + +### Option 1 - Without plugins + +To install without compiling plugins, call the following + +```bash +git clone https://github.com/NVIDIA-AI-IOT/torch2trt +cd torch2trt +sudo python setup.py install +``` + +### Option 2 - With plugins (experimental) + +To install with plugins to support some operations in PyTorch that are not natviely supported with TensorRT, call the following + +> This currently only includes a plugin for ``torch.nn.functional.interpolate`` + +```bash +sudo apt-get install libprotobuf* protobuf-compiler ninja-build +git clone https://github.com/NVIDIA-AI-IOT/torch2trt +cd torch2trt +sudo python setup.py install --plugins +``` + +> torch2trt is tested against a system configured with the [JetCard](http://github.com/NVIDIA-AI-IOT/jetcard) setup. Different system configurations may require additional steps. + + ## How does it work? This converter works by attaching conversion functions (like ``convert_ReLU``) to the original From 7c3dd5a3e43d3a6109f506338a5c2ed7dfc73615 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 16 Aug 2019 01:05:10 -0400 Subject: [PATCH 158/355] added support for NC input to batchnorm1d --- torch2trt/converters/BatchNorm1d.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/torch2trt/converters/BatchNorm1d.py b/torch2trt/converters/BatchNorm1d.py index 383f3ead..51a7bf30 100644 --- a/torch2trt/converters/BatchNorm1d.py +++ b/torch2trt/converters/BatchNorm1d.py @@ -14,18 +14,22 @@ def convert_BatchNorm2d(ctx): # reshape to 2D layer = ctx.network.add_shuffle(input._trt) - layer.reshape_dims = (-1, input.shape[-1], 1) + + if len(input.shape) == 2: + layer.reshape_dims = (input.shape[1], 1, 1) + else: + layer.reshape_dims = (input.shape[1], input.shape[2], 1) layer = ctx.network.add_scale(layer.get_output(0), trt.ScaleMode.CHANNEL, bias, scale, power) - # reshape back to 2D + # reshape back to 1D layer = ctx.network.add_shuffle(layer.get_output(0)) - layer.reshape_dims = (-1, output.shape[-1]) + layer.reshape_dims = tuple(output.shape[1:]) output._trt = layer.get_output(0) - - + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10)]) @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3)]) def test_BatchNorm1d_basic(): return torch.nn.BatchNorm1d(10) \ No newline at end of file From ecd48935b7e5b8b39054b4e5fbe97cd1fc3cd166 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 22 Aug 2019 14:57:45 -0400 Subject: [PATCH 159/355] list hotfix --- torch2trt/torch2trt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 9f59a67a..31f7e0eb 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -251,7 +251,7 @@ def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt outputs = module(*inputs) - if not isinstance(outputs, tuple): + if not isinstance(outputs, tuple) and not isinstance(outputs, list): outputs = (outputs, ) ctx.mark_outputs(outputs, output_names) From 445081f7a2890b743ca874f2c135b13e3520b8ea Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 10 Sep 2019 16:30:41 -0700 Subject: [PATCH 160/355] removed caffe2 dependency --- build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.py b/build.py index 68b103b1..5b5e3dfc 100644 --- a/build.py +++ b/build.py @@ -12,7 +12,7 @@ NINJA_STR = Template( """ rule link - command = g++ -shared -o $$out $$in -L$torch_dir/lib -L$cuda_dir/lib64 -lc10 -lc10_cuda -ltorch -lcudart -lcaffe2 -lcaffe2_gpu -lprotobuf -lprotobuf-lite -pthread -lpthread -lnvinfer + command = g++ -shared -o $$out $$in -L$torch_dir/lib -L$cuda_dir/lib64 -lc10 -lc10_cuda -ltorch -lcudart -lprotobuf -lprotobuf-lite -pthread -lpthread -lnvinfer rule protoc command = protoc $$in --cpp_out=. --python_out=. From e394a14225357c90c82ace24a859c25983b61ca2 Mon Sep 17 00:00:00 2001 From: Markus Thom <55278509+mt1871@users.noreply.github.com> Date: Fri, 13 Sep 2019 13:22:56 +0200 Subject: [PATCH 161/355] consider ceil_mode of torch.nn.MaxPool2d If ceil_mode is False, the default value of layer.padding_mode is PaddingMode.EXPLICIT_ROUND_DOWN. If ceil_mode is True, padding_mode should be trt.PaddingMode.EXPLICIT_ROUND_UP. --- torch2trt/converters/MaxPool2d.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/torch2trt/converters/MaxPool2d.py b/torch2trt/converters/MaxPool2d.py index 4b6e42da..f20c128d 100644 --- a/torch2trt/converters/MaxPool2d.py +++ b/torch2trt/converters/MaxPool2d.py @@ -23,5 +23,7 @@ def convert_MaxPool2d(ctx): input=input._trt, type=trt.PoolingType.MAX, window_size=kernel_size) layer.stride = stride layer.padding = padding + if module.ceil_mode: + layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP output._trt = layer.get_output(0) \ No newline at end of file From 59e0ec8563d997fda75cfcc355f90d4ba688977a Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 17 Sep 2019 13:03:01 -0700 Subject: [PATCH 162/355] registered add for more interfaces --- torch2trt/converters/__init__.py | 1 - torch2trt/converters/add.py | 29 +++++++++++++++++++++++++++++ torch2trt/converters/iadd.py | 24 ------------------------ 3 files changed, 29 insertions(+), 25 deletions(-) delete mode 100644 torch2trt/converters/iadd.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 7dbc8b43..a510ee72 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -3,7 +3,6 @@ from .add import * from .mul import * from .div import * -from .iadd import * from .AvgPool2d import * from .BatchNorm1d import * from .BatchNorm2d import * diff --git a/torch2trt/converters/add.py b/torch2trt/converters/add.py index 7483766b..3a51aa25 100644 --- a/torch2trt/converters/add.py +++ b/torch2trt/converters/add.py @@ -2,6 +2,8 @@ from torch2trt.module_test import add_module_test +@tensorrt_converter('torch.add') +@tensorrt_converter('torch.Tensor.__iadd__') @tensorrt_converter('torch.Tensor.__add__') def convert_add(ctx): input_a = ctx.method_args[0] @@ -21,3 +23,30 @@ def forward(self, x, y): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) def test_add_basic(): return Add() + + +class IAdd(torch.nn.Module): + def __init__(self): + super(IAdd, self).__init__() + + def forward(self, x, y): + x += y + return x + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) +def test_add_iadd(): + return IAdd() + + +class TorchAdd(torch.nn.Module): + def __init__(self): + super(TorchAdd, self).__init__() + + def forward(self, x, y): + return torch.add(x, y) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) +def test_add_torchadd(): + return TorchAdd() \ No newline at end of file diff --git a/torch2trt/converters/iadd.py b/torch2trt/converters/iadd.py deleted file mode 100644 index 7c513b6f..00000000 --- a/torch2trt/converters/iadd.py +++ /dev/null @@ -1,24 +0,0 @@ -from torch2trt.torch2trt import * -from torch2trt.module_test import add_module_test - - -@tensorrt_converter('torch.Tensor.__iadd__') -def convert_iadd(ctx): - input_a = ctx.method_args[0] - input_b = ctx.method_args[1] - layer = ctx.network.add_elementwise(input_a._trt, input_b._trt, trt.ElementWiseOperation.SUM) - ctx.method_args[0]._trt = layer.get_output(0) - - -class IAdd(torch.nn.Module): - def __init__(self): - super(IAdd, self).__init__() - - def forward(self, x, y): - x += y - return x - - -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) -def test_iadd_basic(): - return IAdd() From 5ad17bcfdaf5c526159fc782d23a125cbb0104ba Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 17 Sep 2019 13:18:50 -0700 Subject: [PATCH 163/355] added additional div interfaces --- torch2trt/converters/div.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/torch2trt/converters/div.py b/torch2trt/converters/div.py index a40f09a0..df3fd89c 100644 --- a/torch2trt/converters/div.py +++ b/torch2trt/converters/div.py @@ -2,6 +2,8 @@ from torch2trt.module_test import add_module_test +@tensorrt_converter('torch.div') +@tensorrt_converter('torch.Tensor.__itruediv__') @tensorrt_converter('torch.Tensor.__truediv__') def convert_div(ctx): input_a = ctx.method_args[0] @@ -18,6 +20,34 @@ def __init__(self): def forward(self, x, y): return x / y + @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) def test_div_basic(): - return Div() \ No newline at end of file + return Div() + + +class IDiv(torch.nn.Module): + def __init__(self): + super(IDiv, self).__init__() + + def forward(self, x, y): + x /= y + return x + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) +def test_div_idiv(): + return IDiv() + + +class TorchDiv(torch.nn.Module): + def __init__(self): + super(TorchDiv, self).__init__() + + def forward(self, x, y): + return torch.div(x, y) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) +def test_div_torchdiv(): + return TorchDiv() \ No newline at end of file From e951955966bad7d8e5ee7f8fc37c81ca4be1db54 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 17 Sep 2019 13:19:27 -0700 Subject: [PATCH 164/355] added additional mul interfaces --- torch2trt/converters/mul.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/torch2trt/converters/mul.py b/torch2trt/converters/mul.py index ed84f208..53ee3b71 100644 --- a/torch2trt/converters/mul.py +++ b/torch2trt/converters/mul.py @@ -2,6 +2,8 @@ from torch2trt.module_test import add_module_test +@tensorrt_converter('torch.mul') +@tensorrt_converter('torch.Tensor.__imul__') @tensorrt_converter('torch.Tensor.__mul__') def convert_mul(ctx): input_a = ctx.method_args[0] @@ -20,4 +22,31 @@ def forward(self, x, y): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) def test_mul_basic(): - return Mul() \ No newline at end of file + return Mul() + + +class IMul(torch.nn.Module): + def __init__(self): + super(IMul, self).__init__() + + def forward(self, x, y): + x *= y + return x + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) +def test_mul_imul(): + return IMul() + + +class TorchMul(torch.nn.Module): + def __init__(self): + super(TorchMul, self).__init__() + + def forward(self, x, y): + return torch.mul(x, y) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) +def test_mul_torchmul(): + return TorchMul() \ No newline at end of file From 5c41345111885eb2c0e74b25c651245010496c2b Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 17 Sep 2019 15:08:31 -0700 Subject: [PATCH 165/355] added clamp, clamp_min, clamp_max --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/clamp.py | 78 ++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 torch2trt/converters/clamp.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index a510ee72..682380d7 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -7,6 +7,7 @@ from .BatchNorm1d import * from .BatchNorm2d import * from .cat import * +from .clamp import * from .Conv1d import * from .Conv2d import * from .ConvTranspose2d import * diff --git a/torch2trt/converters/clamp.py b/torch2trt/converters/clamp.py new file mode 100644 index 00000000..122f0ee2 --- /dev/null +++ b/torch2trt/converters/clamp.py @@ -0,0 +1,78 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +def __add_clamp(network, trt_input, val, op): + + # create TensorRT constant for minimum value + val_shape = (1, ) * len(trt_input.shape) # broadcast all dimensions + val_tensor = val * torch.ones(val_shape, dtype=torch_dtype_from_trt(trt_input.dtype)).cpu().numpy() + val_trt = network.add_constant(val_shape, val_tensor) + layer = network.add_elementwise(trt_input, val_trt.get_output(0), op) + + return layer + + +@tensorrt_converter('torch.clamp_min') +def convert_clamp_min(ctx): + input = ctx.method_args[0] + val = ctx.method_args[1] + output = ctx.method_return + + layer = __add_clamp(ctx.network, input._trt, val, trt.ElementWiseOperation.MAX) + + output._trt = layer.get_output(0) + + +class ClampMin(torch.nn.Module): + def forward(self, x): + return torch.clamp_min(x, -0.1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_clamp_min(): + return ClampMin() + + +@tensorrt_converter('torch.clamp_max') +def convert_clamp_max(ctx): + input = ctx.method_args[0] + val = ctx.method_args[1] + output = ctx.method_return + + layer = __add_clamp(ctx.network, input._trt, val, trt.ElementWiseOperation.MIN) + + output._trt = layer.get_output(0) + + +class ClampMax(torch.nn.Module): + def forward(self, x): + return torch.clamp_max(x, 0.1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_clamp_max(): + return ClampMax() + + +@tensorrt_converter('torch.clamp') +def convert_clamp(ctx): + input = ctx.method_args[0] + min_val = ctx.method_args[1] + max_val = ctx.method_args[2] + output = ctx.method_return + + layer = __add_clamp(ctx.network, input._trt, min_val, trt.ElementWiseOperation.MAX) + layer = __add_clamp(ctx.network, layer.get_output(0), max_val, trt.ElementWiseOperation.MIN) + + output._trt = layer.get_output(0) + + +class Clamp(torch.nn.Module): + def forward(self, x): + return torch.clamp(x, -0.1, 0.1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_clamp_max(): + return Clamp() \ No newline at end of file From 08fecd30b1aae8ca06cc3984938327ca378a5dbd Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 17 Sep 2019 15:14:16 -0700 Subject: [PATCH 166/355] added clamp tensor --- torch2trt/converters/clamp.py | 66 ++++++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 12 deletions(-) diff --git a/torch2trt/converters/clamp.py b/torch2trt/converters/clamp.py index 122f0ee2..50e1d3a0 100644 --- a/torch2trt/converters/clamp.py +++ b/torch2trt/converters/clamp.py @@ -13,7 +13,11 @@ def __add_clamp(network, trt_input, val, op): return layer +# CLAMP_MIN + + @tensorrt_converter('torch.clamp_min') +@tensorrt_converter('torch.Tensor.clamp_min') def convert_clamp_min(ctx): input = ctx.method_args[0] val = ctx.method_args[1] @@ -24,17 +28,31 @@ def convert_clamp_min(ctx): output._trt = layer.get_output(0) -class ClampMin(torch.nn.Module): +class TorchClampMin(torch.nn.Module): def forward(self, x): return torch.clamp_min(x, -0.1) @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) -def test_clamp_min(): - return ClampMin() +def test_torch_clamp_min(): + return TorchClampMin() + + +class TensorClampMin(torch.nn.Module): + def forward(self, x): + return x.clamp_min(-0.1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_tensor_clamp_min(): + return TensorClampMin() +# CLAMP_MAX + + @tensorrt_converter('torch.clamp_max') +@tensorrt_converter('torch.Tensor.clamp_max') def convert_clamp_max(ctx): input = ctx.method_args[0] val = ctx.method_args[1] @@ -45,17 +63,31 @@ def convert_clamp_max(ctx): output._trt = layer.get_output(0) -class ClampMax(torch.nn.Module): +class TorchClampMax(torch.nn.Module): def forward(self, x): return torch.clamp_max(x, 0.1) - + @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) -def test_clamp_max(): - return ClampMax() +def test_torch_clamp_max(): + return TorchClampMax() + + +class TensorClampMax(torch.nn.Module): + def forward(self, x): + return x.clamp_max(0.1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_tensor_clamp_max(): + return TensorClampMax() + + +# CLAMP @tensorrt_converter('torch.clamp') +@tensorrt_converter('torch.Tensor.clamp') def convert_clamp(ctx): input = ctx.method_args[0] min_val = ctx.method_args[1] @@ -68,11 +100,21 @@ def convert_clamp(ctx): output._trt = layer.get_output(0) -class Clamp(torch.nn.Module): +class TorchClamp(torch.nn.Module): def forward(self, x): return torch.clamp(x, -0.1, 0.1) - - + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_torch_clamp(): + return TorchClamp() + + +class TensorClamp(torch.nn.Module): + def forward(self, x): + return x.clamp(-0.1, 0.1) + + @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) -def test_clamp_max(): - return Clamp() \ No newline at end of file +def test_tensor_clamp(): + return TensorClamp() \ No newline at end of file From 2769c29931bced9275c4c616f774722a95a39896 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 17 Sep 2019 18:05:21 -0700 Subject: [PATCH 167/355] added normalize --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/normalize.py | 93 +++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 torch2trt/converters/normalize.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 682380d7..535915e8 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -16,6 +16,7 @@ from .Linear import * from .LogSoftmax import * from .MaxPool2d import * +from .normalize import * from .pad import * from .relu import * from .ReLU import * diff --git a/torch2trt/converters/normalize.py b/torch2trt/converters/normalize.py new file mode 100644 index 00000000..4b4e547f --- /dev/null +++ b/torch2trt/converters/normalize.py @@ -0,0 +1,93 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +def __get_arg(ctx, name, pos, default): + if name in ctx.method_kwargs: + return ctx.method_kwargs[name] + elif len(ctx.method_args) > pos: + return ctx.method_args[pos] + else: + return default + + +def __trt_add_scalar_constant_like(network, tensor, value): + shape = (1, ) * len(tensor.shape) # broadcast all dimensions + array = value * torch.ones(shape, dtype=torch_dtype_from_trt(tensor.dtype)).cpu().numpy() + return network.add_constant(shape, array).get_output(0) + + +def __torch_dim_to_trt_bitmask(dim): + if not isinstance(dim, tuple): + dim = (dim, ) + + # create axes bitmask for reduce layer + axes = 0 + for d in dim: + axes |= 1 << (d - 1) # -1 to remove batch dimension + + return axes + + +@tensorrt_converter('torch.nn.functional.normalize') +def convert_normalize(ctx): + input = ctx.method_args[0] + output = ctx.method_return + + # get power + p = __get_arg(ctx, name='p', pos=1, default=2) + dim = __get_arg(ctx, name='dim', pos=2, default=1) + eps = __get_arg(ctx, name='eps', pos=3, default=1e-12) + + eps_trt = __trt_add_scalar_constant_like(ctx.network, input._trt, eps) + p_trt = __trt_add_scalar_constant_like(ctx.network, input._trt, p) + p_inv_trt = __trt_add_scalar_constant_like(ctx.network, input._trt, 1.0 / p) + + # compute norm = sum(abs(x)**p, dim=dim)**(1./p) + norm = ctx.network.add_unary(input._trt, trt.UnaryOperation.ABS).get_output(0) + norm = ctx.network.add_elementwise(norm, p_trt, trt.ElementWiseOperation.POW).get_output(0) + norm = ctx.network.add_reduce(norm, trt.ReduceOperation.SUM, __torch_dim_to_trt_bitmask(dim), keep_dims=True).get_output(0) + norm = ctx.network.add_elementwise(norm, p_inv_trt, trt.ElementWiseOperation.POW).get_output(0) + + # clamp norm = max(norm, eps) + norm = ctx.network.add_elementwise(norm, eps_trt, trt.ElementWiseOperation.MAX).get_output(0) + + # divide input by norm + output._trt = ctx.network.add_elementwise(input._trt, norm, trt.ElementWiseOperation.DIV).get_output(0) + + +class Normalize(torch.nn.Module): + def __init__(self, *args, **kwargs): + super(Normalize, self).__init__() + self.args = args + self.kwargs = kwargs + + def forward(self, x): + return torch.nn.functional.normalize(x, *self.args, **self.kwargs) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_normalize_basic(): + return Normalize() + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_normalize_l1_basic(): + return Normalize(p=1.0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_normalize_l1p5_basic(): + return Normalize(p=1.5) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_normalize_l2_height(): + return Normalize(p=2.0, dim=2) \ No newline at end of file From 43d1b1a72ff75b7d8aed1eb29f4de6c3a3e62465 Mon Sep 17 00:00:00 2001 From: Markus Thom <55278509+mt1871@users.noreply.github.com> Date: Wed, 18 Sep 2019 12:18:52 +0200 Subject: [PATCH 168/355] added test cases for MaxPool2d --- torch2trt/converters/MaxPool2d.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/torch2trt/converters/MaxPool2d.py b/torch2trt/converters/MaxPool2d.py index f20c128d..466c356c 100644 --- a/torch2trt/converters/MaxPool2d.py +++ b/torch2trt/converters/MaxPool2d.py @@ -1,4 +1,5 @@ from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test @tensorrt_converter('torch.nn.MaxPool2d.forward') @@ -26,4 +27,16 @@ def convert_MaxPool2d(ctx): if module.ceil_mode: layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP - output._trt = layer.get_output(0) \ No newline at end of file + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)]) +def test_MaxPool2d_without_ceil_mode(): + return torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)]) +def test_MaxPool2d_with_ceil_mode(): + return torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True) From 630194cef6b0505adb8c572df26ef51c0f856b62 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 25 Sep 2019 13:20:12 -0700 Subject: [PATCH 169/355] added mobilenet_v2 to module tests --- test.sh | 1 + torch2trt/module_test.py | 1 + 2 files changed, 2 insertions(+) diff --git a/test.sh b/test.sh index 54b3764e..623004d7 100755 --- a/test.sh +++ b/test.sh @@ -27,3 +27,4 @@ python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg11_bn python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg13_bn python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg16_bn python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg19_bn +python3 -m torch2trt.test -o $OUTPUT_FILE --name mobilenet_v2 diff --git a/torch2trt/module_test.py b/torch2trt/module_test.py index 60ce921d..e9c1c48e 100644 --- a/torch2trt/module_test.py +++ b/torch2trt/module_test.py @@ -35,6 +35,7 @@ def module_name(self): ModuleTest(torchvision.models.vgg13_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), ModuleTest(torchvision.models.vgg16_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), ModuleTest(torchvision.models.vgg19_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.mobilenet_v2, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), ] From f29ff96c0b69c65a652058f928a29721c56192b2 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 26 Sep 2019 17:10:43 -0700 Subject: [PATCH 170/355] added parsed_args --- torch2trt/torch2trt.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 31f7e0eb..115d41ba 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -2,6 +2,7 @@ import tensorrt as trt from copy import copy import numpy as np +import inspect # UTILITY FUNCTIONS @@ -72,6 +73,29 @@ def trt_num_outputs(engine): CONVERTERS = {} + +def parse_method_args(method, args, kwargs): + """Returns dictionary of arguments w. defaults given function, args, and kwargs""" + + argspec = inspect.getfullargspec(method) + + named_args = {} + + # fill from default + offset = len(argspec.args) - len(argspec.defaults) + for i in range(len(argspec.defaults)): + named_args[argspec.args[i + offset]] = argspec.defaults[i] + + # fill from args + for i, value in enumerate(args): + named_args[argspec.args[i]] = value + + # fill from kwargs + for key, value in kwargs.items(): + named_args[key] = value + + return named_args + def attach_converter(ctx, method, converter): """Gets a function that executes PyTorch method and TensorRT converter""" @@ -92,6 +116,9 @@ def wrapper(*args, **kwargs): ctx.method_args = args ctx.method_kwargs = kwargs ctx.method_return = outputs + + # parse args, kwargs to get dictionary with defaults filled in + ctx.parsed_args = parse_method_args(method, ctx.method_args, ctx.method_kwargs) #print('%s : %s' % (method.__qualname__, converter.__name__)) converter(ctx) From d409f138afd08f8d399de0cf88d855ac60039f0f Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 26 Sep 2019 17:41:53 -0700 Subject: [PATCH 171/355] added utilities to support normalize --- torch2trt/converters/normalize.py | 48 ++++++++----------------------- torch2trt/torch2trt.py | 27 +++++++++++++++++ 2 files changed, 39 insertions(+), 36 deletions(-) diff --git a/torch2trt/converters/normalize.py b/torch2trt/converters/normalize.py index 4b4e547f..f3836218 100644 --- a/torch2trt/converters/normalize.py +++ b/torch2trt/converters/normalize.py @@ -2,58 +2,34 @@ from torch2trt.module_test import add_module_test -def __get_arg(ctx, name, pos, default): - if name in ctx.method_kwargs: - return ctx.method_kwargs[name] - elif len(ctx.method_args) > pos: - return ctx.method_args[pos] - else: - return default - - -def __trt_add_scalar_constant_like(network, tensor, value): - shape = (1, ) * len(tensor.shape) # broadcast all dimensions - array = value * torch.ones(shape, dtype=torch_dtype_from_trt(tensor.dtype)).cpu().numpy() - return network.add_constant(shape, array).get_output(0) - - -def __torch_dim_to_trt_bitmask(dim): - if not isinstance(dim, tuple): - dim = (dim, ) - - # create axes bitmask for reduce layer - axes = 0 - for d in dim: - axes |= 1 << (d - 1) # -1 to remove batch dimension - - return axes - - @tensorrt_converter('torch.nn.functional.normalize') def convert_normalize(ctx): input = ctx.method_args[0] + input_trt = get_or_create_trt_tensor(ctx.network, input) output = ctx.method_return # get power - p = __get_arg(ctx, name='p', pos=1, default=2) - dim = __get_arg(ctx, name='dim', pos=2, default=1) - eps = __get_arg(ctx, name='eps', pos=3, default=1e-12) + p = ctx.parsed_args['p'] + dim = ctx.parsed_args['dim'] + eps = ctx.parsed_args['eps'] - eps_trt = __trt_add_scalar_constant_like(ctx.network, input._trt, eps) - p_trt = __trt_add_scalar_constant_like(ctx.network, input._trt, p) - p_inv_trt = __trt_add_scalar_constant_like(ctx.network, input._trt, 1.0 / p) + # add broadcastable scalar constants to network + scalar_shape = (1,) * len(input.shape) + eps_trt = get_or_create_trt_tensor(ctx.network, eps * torch.ones(scalar_shape)) + p_trt = get_or_create_trt_tensor(ctx.network, p * torch.ones(scalar_shape)) + p_inv_trt = get_or_create_trt_tensor(ctx.network, torch.ones(scalar_shape) / p) # compute norm = sum(abs(x)**p, dim=dim)**(1./p) - norm = ctx.network.add_unary(input._trt, trt.UnaryOperation.ABS).get_output(0) + norm = ctx.network.add_unary(input_trt, trt.UnaryOperation.ABS).get_output(0) norm = ctx.network.add_elementwise(norm, p_trt, trt.ElementWiseOperation.POW).get_output(0) - norm = ctx.network.add_reduce(norm, trt.ReduceOperation.SUM, __torch_dim_to_trt_bitmask(dim), keep_dims=True).get_output(0) + norm = ctx.network.add_reduce(norm, trt.ReduceOperation.SUM, torch_dim_to_trt_axes(dim), keep_dims=True).get_output(0) norm = ctx.network.add_elementwise(norm, p_inv_trt, trt.ElementWiseOperation.POW).get_output(0) # clamp norm = max(norm, eps) norm = ctx.network.add_elementwise(norm, eps_trt, trt.ElementWiseOperation.MAX).get_output(0) # divide input by norm - output._trt = ctx.network.add_elementwise(input._trt, norm, trt.ElementWiseOperation.DIV).get_output(0) + output._trt = ctx.network.add_elementwise(input_trt, norm, trt.ElementWiseOperation.DIV).get_output(0) class Normalize(torch.nn.Module): diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 115d41ba..f8590366 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -68,6 +68,33 @@ def trt_num_outputs(engine): return count +def torch_dim_to_trt_axes(dim): + """Converts torch dim, or tuple of dims to a tensorrt axes bitmask""" + if not isinstance(dim, tuple): + dim = (dim, ) + + # create axes bitmask for reduce layer + axes = 0 + for d in dim: + axes |= 1 << (d - 1) # -1 to remove batch dimension + + return axes + + +def get_or_create_trt_tensor(network, tensor): + """Adds tensor as constant to network and sets _trt attribute. Returns _trt attribute""" + + # only add if not already done + if not hasattr(tensor, '_trt'): + shape = tuple(tensor.shape[1:]) # exclude batch dimension + array = tensor[0].detach().cpu().numpy() + + layer = network.add_constant(shape, array) + tensor._trt = layer.get_output(0) + + return tensor._trt + + # CONVERSION REGISTRY AND HOOKS From dc547b819cf0cf34125bce7d0e6a771098fd498a Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 26 Sep 2019 18:09:19 -0700 Subject: [PATCH 172/355] dtype to normalized constants --- torch2trt/converters/normalize.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torch2trt/converters/normalize.py b/torch2trt/converters/normalize.py index f3836218..18e8277f 100644 --- a/torch2trt/converters/normalize.py +++ b/torch2trt/converters/normalize.py @@ -15,9 +15,9 @@ def convert_normalize(ctx): # add broadcastable scalar constants to network scalar_shape = (1,) * len(input.shape) - eps_trt = get_or_create_trt_tensor(ctx.network, eps * torch.ones(scalar_shape)) - p_trt = get_or_create_trt_tensor(ctx.network, p * torch.ones(scalar_shape)) - p_inv_trt = get_or_create_trt_tensor(ctx.network, torch.ones(scalar_shape) / p) + eps_trt = get_or_create_trt_tensor(ctx.network, eps * torch.ones(scalar_shape, dtype=input.dtype)) + p_trt = get_or_create_trt_tensor(ctx.network, p * torch.ones(scalar_shape, dtype=input.dtype)) + p_inv_trt = get_or_create_trt_tensor(ctx.network, torch.ones(scalar_shape, dtype=input.dtype) / p) # compute norm = sum(abs(x)**p, dim=dim)**(1./p) norm = ctx.network.add_unary(input_trt, trt.UnaryOperation.ABS).get_output(0) From c429ae78d2ce1c753e7873669fc6d3265c37b42d Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 26 Sep 2019 19:34:23 -0700 Subject: [PATCH 173/355] added avg_pool2d and removed AvgPool2d --- torch2trt/converters/AvgPool2d.py | 26 ------------------------- torch2trt/converters/__init__.py | 2 +- torch2trt/torch2trt.py | 32 +++++++------------------------ 3 files changed, 8 insertions(+), 52 deletions(-) delete mode 100644 torch2trt/converters/AvgPool2d.py diff --git a/torch2trt/converters/AvgPool2d.py b/torch2trt/converters/AvgPool2d.py deleted file mode 100644 index 529ccad0..00000000 --- a/torch2trt/converters/AvgPool2d.py +++ /dev/null @@ -1,26 +0,0 @@ -from torch2trt.torch2trt import * - - -@tensorrt_converter('torch.nn.AvgPool2d.forward') -def convert_AvgPool2d(ctx): - module = ctx.method_args[0] - input = ctx.method_args[1] - output = ctx.method_return - - kernel_size = module.kernel_size - if not isinstance(kernel_size, tuple): - kernel_size = (kernel_size, ) * 2 - stride = module.stride - if not isinstance(stride, tuple): - stride = (stride, ) * 2 - padding = module.padding - if not isinstance(padding, tuple): - padding = (padding, ) * 2 - - layer = ctx.network.add_pooling( - input=input._trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size) - layer.stride = stride - layer.padding = padding - layer.average_count_excludes_padding = not module.count_include_pad - - output._trt = layer.get_output(0) \ No newline at end of file diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 535915e8..c78bad07 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -1,9 +1,9 @@ from .adaptive_avg_pool2d import * from .AdaptiveAvgPool2d import * from .add import * +from .avg_pool2d import * from .mul import * from .div import * -from .AvgPool2d import * from .BatchNorm1d import * from .BatchNorm2d import * from .cat import * diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index f8590366..c0fc9edd 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -2,7 +2,6 @@ import tensorrt as trt from copy import copy import numpy as np -import inspect # UTILITY FUNCTIONS @@ -99,29 +98,15 @@ def get_or_create_trt_tensor(network, tensor): CONVERTERS = {} - - -def parse_method_args(method, args, kwargs): - """Returns dictionary of arguments w. defaults given function, args, and kwargs""" - - argspec = inspect.getfullargspec(method) - - named_args = {} - # fill from default - offset = len(argspec.args) - len(argspec.defaults) - for i in range(len(argspec.defaults)): - named_args[argspec.args[i + offset]] = argspec.defaults[i] - # fill from args - for i, value in enumerate(args): - named_args[argspec.args[i]] = value - - # fill from kwargs - for key, value in kwargs.items(): - named_args[key] = value - - return named_args +def get_arg(ctx, name, pos, default): + if name in ctx.method_kwargs: + return ctx.method_kwargs[name] + elif len(ctx.method_args) > pos: + return ctx.method_args[pos] + else: + return default def attach_converter(ctx, method, converter): @@ -143,9 +128,6 @@ def wrapper(*args, **kwargs): ctx.method_args = args ctx.method_kwargs = kwargs ctx.method_return = outputs - - # parse args, kwargs to get dictionary with defaults filled in - ctx.parsed_args = parse_method_args(method, ctx.method_args, ctx.method_kwargs) #print('%s : %s' % (method.__qualname__, converter.__name__)) converter(ctx) From 5abcbbee43b78660ea173e5b118e7f5a2748db53 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 26 Sep 2019 19:39:10 -0700 Subject: [PATCH 174/355] fixed normalize --- torch2trt/converters/normalize.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/torch2trt/converters/normalize.py b/torch2trt/converters/normalize.py index 18e8277f..718662f0 100644 --- a/torch2trt/converters/normalize.py +++ b/torch2trt/converters/normalize.py @@ -4,15 +4,15 @@ @tensorrt_converter('torch.nn.functional.normalize') def convert_normalize(ctx): - input = ctx.method_args[0] + # get args + input = get_arg(ctx, 'input', pos=0, default=None) + p = get_arg(ctx, 'p', pos=1, default=2) + dim = get_arg(ctx, 'dim', pos=2, default=1) + eps = get_arg(ctx, 'eps', pos=3, default=1e-12) + input_trt = get_or_create_trt_tensor(ctx.network, input) output = ctx.method_return - # get power - p = ctx.parsed_args['p'] - dim = ctx.parsed_args['dim'] - eps = ctx.parsed_args['eps'] - # add broadcastable scalar constants to network scalar_shape = (1,) * len(input.shape) eps_trt = get_or_create_trt_tensor(ctx.network, eps * torch.ones(scalar_shape, dtype=input.dtype)) From d47af74b2182afca29458f1ac9de7b58657e5496 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 26 Sep 2019 19:41:45 -0700 Subject: [PATCH 175/355] added missing avg_pool2d.py --- torch2trt/converters/avg_pool2d.py | 54 ++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 torch2trt/converters/avg_pool2d.py diff --git a/torch2trt/converters/avg_pool2d.py b/torch2trt/converters/avg_pool2d.py new file mode 100644 index 00000000..8fb763e2 --- /dev/null +++ b/torch2trt/converters/avg_pool2d.py @@ -0,0 +1,54 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.nn.functional.avg_pool2d') +def convert_avg_pool2d(ctx): + # parse args + input = get_arg(ctx, 'input', pos=0, default=None) + kernel_size = get_arg(ctx, 'kernel_size', pos=1, default=None) + stride = get_arg(ctx, 'stride', pos=2, default=None) + padding = get_arg(ctx, 'padding', pos=3, default=0) + ceil_mode = get_arg(ctx, 'ceil_mode', pos=4, default=True) + count_include_pad = get_arg(ctx, 'count_include_pad', pos=5, default=True) + + # get input trt tensor (or create constant if it doesn't exist) + input_trt = get_or_create_trt_tensor(ctx.network, input) + + output = ctx.method_return + + # get kernel size + if not isinstance(kernel_size, tuple): + kernel_size = (kernel_size, ) * 2 + + # get stride + if not isinstance(stride, tuple): + stride = (stride, ) * 2 + + # get padding + if not isinstance(padding, tuple): + padding = (padding, ) * 2 + + layer = ctx.network.add_pooling( + input=input_trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size) + + layer.stride = stride + layer.padding = padding + layer.average_count_excludes_padding = not count_include_pad + + if ceil_mode: + layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP + + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)]) +def test_avg_pool2d_without_ceil_mode(): + return torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)]) +def test_avg_pool2d_with_ceil_mode(): + return torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True, count_include_pad=False) # TRT does not support ceil_mode=True && count_include_pad=True From 916e25dcf27655b5099ee14a7cb322517bd63132 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 26 Sep 2019 19:45:52 -0700 Subject: [PATCH 176/355] added max_pool2d removed MaxPool2d --- torch2trt/converters/__init__.py | 2 +- .../{MaxPool2d.py => max_pool2d.py} | 34 ++++++++++++------- 2 files changed, 23 insertions(+), 13 deletions(-) rename torch2trt/converters/{MaxPool2d.py => max_pool2d.py} (59%) diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index c78bad07..b0609df9 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -15,7 +15,7 @@ from .Identity import * from .Linear import * from .LogSoftmax import * -from .MaxPool2d import * +from .max_pool2d import * from .normalize import * from .pad import * from .relu import * diff --git a/torch2trt/converters/MaxPool2d.py b/torch2trt/converters/max_pool2d.py similarity index 59% rename from torch2trt/converters/MaxPool2d.py rename to torch2trt/converters/max_pool2d.py index 466c356c..45894c2b 100644 --- a/torch2trt/converters/MaxPool2d.py +++ b/torch2trt/converters/max_pool2d.py @@ -2,34 +2,44 @@ from torch2trt.module_test import add_module_test -@tensorrt_converter('torch.nn.MaxPool2d.forward') -def convert_MaxPool2d(ctx): - module = ctx.method_args[0] - input = ctx.method_args[1] +@tensorrt_converter('torch.nn.functional.max_pool2d') +def convert_max_pool2d(ctx): + # parse args + input = get_arg(ctx, 'input', pos=0, default=None) + kernel_size = get_arg(ctx, 'kernel_size', pos=1, default=None) + stride = get_arg(ctx, 'stride', pos=2, default=None) + padding = get_arg(ctx, 'padding', pos=3, default=0) + ceil_mode = get_arg(ctx, 'ceil_mode', pos=4, default=True) + + # get input trt tensor (or create constant if it doesn't exist) + input_trt = get_or_create_trt_tensor(ctx.network, input) + output = ctx.method_return - kernel_size = module.kernel_size + # get kernel size if not isinstance(kernel_size, tuple): kernel_size = (kernel_size, ) * 2 - stride = module.stride + # get stride if not isinstance(stride, tuple): stride = (stride, ) * 2 - padding = module.padding + # get padding if not isinstance(padding, tuple): padding = (padding, ) * 2 layer = ctx.network.add_pooling( - input=input._trt, type=trt.PoolingType.MAX, window_size=kernel_size) + input=input_trt, type=trt.PoolingType.MAX, window_size=kernel_size) + layer.stride = stride layer.padding = padding - if module.ceil_mode: + + if ceil_mode: layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP output._trt = layer.get_output(0) - - + + @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)]) @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)]) def test_MaxPool2d_without_ceil_mode(): @@ -39,4 +49,4 @@ def test_MaxPool2d_without_ceil_mode(): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)]) @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)]) def test_MaxPool2d_with_ceil_mode(): - return torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True) + return torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True) \ No newline at end of file From 100bcdb1a5708cad7eaf48ca7fc7eeb072212885 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 27 Sep 2019 11:25:06 -0700 Subject: [PATCH 177/355] fixed arg parsing in avg_pool2d and max_pool2d --- torch2trt/converters/avg_pool2d.py | 2 +- torch2trt/converters/max_pool2d.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/torch2trt/converters/avg_pool2d.py b/torch2trt/converters/avg_pool2d.py index 8fb763e2..a1b48a5f 100644 --- a/torch2trt/converters/avg_pool2d.py +++ b/torch2trt/converters/avg_pool2d.py @@ -9,7 +9,7 @@ def convert_avg_pool2d(ctx): kernel_size = get_arg(ctx, 'kernel_size', pos=1, default=None) stride = get_arg(ctx, 'stride', pos=2, default=None) padding = get_arg(ctx, 'padding', pos=3, default=0) - ceil_mode = get_arg(ctx, 'ceil_mode', pos=4, default=True) + ceil_mode = get_arg(ctx, 'ceil_mode', pos=4, default=False) count_include_pad = get_arg(ctx, 'count_include_pad', pos=5, default=True) # get input trt tensor (or create constant if it doesn't exist) diff --git a/torch2trt/converters/max_pool2d.py b/torch2trt/converters/max_pool2d.py index 45894c2b..c65f66b7 100644 --- a/torch2trt/converters/max_pool2d.py +++ b/torch2trt/converters/max_pool2d.py @@ -9,7 +9,8 @@ def convert_max_pool2d(ctx): kernel_size = get_arg(ctx, 'kernel_size', pos=1, default=None) stride = get_arg(ctx, 'stride', pos=2, default=None) padding = get_arg(ctx, 'padding', pos=3, default=0) - ceil_mode = get_arg(ctx, 'ceil_mode', pos=4, default=True) + dilation = get_arg(ctx, 'dilation', pos=4, default=1) + ceil_mode = get_arg(ctx, 'ceil_mode', pos=5, default=False) # get input trt tensor (or create constant if it doesn't exist) input_trt = get_or_create_trt_tensor(ctx.network, input) From 46534bdfa131b22e9d8d1358515d655839209b05 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 29 Sep 2019 15:14:28 -0700 Subject: [PATCH 178/355] refactor --- torch2trt/converters/avg_pool2d.py | 2 +- torch2trt/converters/max_pool2d.py | 2 +- torch2trt/converters/normalize.py | 8 ++++---- torch2trt/torch2trt.py | 24 ++++++++---------------- 4 files changed, 14 insertions(+), 22 deletions(-) diff --git a/torch2trt/converters/avg_pool2d.py b/torch2trt/converters/avg_pool2d.py index a1b48a5f..897b5870 100644 --- a/torch2trt/converters/avg_pool2d.py +++ b/torch2trt/converters/avg_pool2d.py @@ -13,7 +13,7 @@ def convert_avg_pool2d(ctx): count_include_pad = get_arg(ctx, 'count_include_pad', pos=5, default=True) # get input trt tensor (or create constant if it doesn't exist) - input_trt = get_or_create_trt_tensor(ctx.network, input) + input_trt = input._trt output = ctx.method_return diff --git a/torch2trt/converters/max_pool2d.py b/torch2trt/converters/max_pool2d.py index c65f66b7..18956b23 100644 --- a/torch2trt/converters/max_pool2d.py +++ b/torch2trt/converters/max_pool2d.py @@ -13,7 +13,7 @@ def convert_max_pool2d(ctx): ceil_mode = get_arg(ctx, 'ceil_mode', pos=5, default=False) # get input trt tensor (or create constant if it doesn't exist) - input_trt = get_or_create_trt_tensor(ctx.network, input) + input_trt = input._trt output = ctx.method_return diff --git a/torch2trt/converters/normalize.py b/torch2trt/converters/normalize.py index 718662f0..f462dfea 100644 --- a/torch2trt/converters/normalize.py +++ b/torch2trt/converters/normalize.py @@ -10,14 +10,14 @@ def convert_normalize(ctx): dim = get_arg(ctx, 'dim', pos=2, default=1) eps = get_arg(ctx, 'eps', pos=3, default=1e-12) - input_trt = get_or_create_trt_tensor(ctx.network, input) + input_trt = input._trt output = ctx.method_return # add broadcastable scalar constants to network scalar_shape = (1,) * len(input.shape) - eps_trt = get_or_create_trt_tensor(ctx.network, eps * torch.ones(scalar_shape, dtype=input.dtype)) - p_trt = get_or_create_trt_tensor(ctx.network, p * torch.ones(scalar_shape, dtype=input.dtype)) - p_inv_trt = get_or_create_trt_tensor(ctx.network, torch.ones(scalar_shape, dtype=input.dtype) / p) + eps_trt = add_trt_constant(ctx.network, eps * torch.ones(scalar_shape, dtype=input.dtype)) + p_trt = add_trt_constant(ctx.network, p * torch.ones(scalar_shape, dtype=input.dtype)) + p_inv_trt = add_trt_constant(ctx.network, torch.ones(scalar_shape, dtype=input.dtype) / p) # compute norm = sum(abs(x)**p, dim=dim)**(1./p) norm = ctx.network.add_unary(input_trt, trt.UnaryOperation.ABS).get_output(0) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index c0fc9edd..9f9317da 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -78,21 +78,14 @@ def torch_dim_to_trt_axes(dim): axes |= 1 << (d - 1) # -1 to remove batch dimension return axes - - -def get_or_create_trt_tensor(network, tensor): - """Adds tensor as constant to network and sets _trt attribute. Returns _trt attribute""" - # only add if not already done - if not hasattr(tensor, '_trt'): - shape = tuple(tensor.shape[1:]) # exclude batch dimension - array = tensor[0].detach().cpu().numpy() - - layer = network.add_constant(shape, array) - tensor._trt = layer.get_output(0) - - return tensor._trt +def add_trt_constant(network, tensor): + shape = tuple(tensor.shape[1:]) + array = tensor[0].detach().cpu().numpy() + layer = network.add_constant(shape, array) + return layer.get_output(0) + # CONVERSION REGISTRY AND HOOKS @@ -122,13 +115,12 @@ def wrapper(*args, **kwargs): # run original method outputs = method(*args, **kwargs) - + if not skip: - # call conversion hook ctx.method_args = args ctx.method_kwargs = kwargs ctx.method_return = outputs - + #print('%s : %s' % (method.__qualname__, converter.__name__)) converter(ctx) From 05e70225dfd9cca53859104578db3f0ac27d6347 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 29 Sep 2019 17:01:23 -0700 Subject: [PATCH 179/355] added split and chunk --- torch2trt/converters/__init__.py | 3 ++ torch2trt/converters/chunk.py | 60 ++++++++++++++++++++++++ torch2trt/converters/split.py | 79 ++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+) create mode 100644 torch2trt/converters/chunk.py create mode 100644 torch2trt/converters/split.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index b0609df9..b2d7c23e 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -28,6 +28,9 @@ from .transpose import * from .mean import * from .softmax import * +from .split import * +from .chunk import * + try: from .interpolate import * diff --git a/torch2trt/converters/chunk.py b/torch2trt/converters/chunk.py new file mode 100644 index 00000000..eb870103 --- /dev/null +++ b/torch2trt/converters/chunk.py @@ -0,0 +1,60 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test +from .split import convert_split + + +@tensorrt_converter('torch.chunk') +@tensorrt_converter('torch.Tensor.chunk') +def convert_chunk(ctx): + convert_split(ctx) + + +class TorchChunk(torch.nn.Module): + + def __init__(self, *args, **kwargs): + super(TorchChunk, self).__init__() + self.args = args + self.kwargs = kwargs + + def forward(self, x): + return torch.chunk(x, *self.args, **self.kwargs) + + +class TensorChunk(torch.nn.Module): + + def __init__(self, *args, **kwargs): + super(TensorChunk, self).__init__() + self.args = args + self.kwargs = kwargs + + def forward(self, x): + return x.chunk(*self.args, **self.kwargs) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_torch_chunk_1_1(): + return TorchChunk(1, 1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_torch_chunk_2_1(): + return TorchChunk(2, 1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_torch_chunk_3_1(): + return TorchChunk(3, 1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_torch_chunk_3_2(): + return TorchChunk(3, 2) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_tensor_chunk_3_2(): + return TensorChunk(3, 2) \ No newline at end of file diff --git a/torch2trt/converters/split.py b/torch2trt/converters/split.py new file mode 100644 index 00000000..22219c26 --- /dev/null +++ b/torch2trt/converters/split.py @@ -0,0 +1,79 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.split') +@tensorrt_converter('torch.Tensor.split') +def convert_split(ctx): + input = get_arg(ctx, 'input', 0, None) + # we don't need to parse split/chunk (arg 1) + # since we infer size from output tensors + dim = get_arg(ctx, 'dim', 2, 0) + + outputs = ctx.method_return + + assert(dim >= 1) + + start = [0] * len(input.shape[1:]) # exclude batch + stride = [1] * len(start) + offset = 0 + trt_dim = dim - 1 + + # add slice layers + for i, output in enumerate(outputs): + shape = list(output.shape[1:]) # exclude batch dim + start[trt_dim] = offset + layer = ctx.network.add_slice(input._trt, start=start, shape=shape, stride=stride) + output._trt = layer.get_output(0) + offset = offset + shape[trt_dim] + + +class TorchSplit(torch.nn.Module): + + def __init__(self, *args, **kwargs): + super(TorchSplit, self).__init__() + self.args = args + self.kwargs = kwargs + + def forward(self, x): + return torch.split(x, *self.args, **self.kwargs) + + +class TensorSplit(torch.nn.Module): + + def __init__(self, *args, **kwargs): + super(TensorSplit, self).__init__() + self.args = args + self.kwargs = kwargs + + def forward(self, x): + return x.split(*self.args, **self.kwargs) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_torch_split_1_1(): + return TorchSplit(1, 1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_torch_split_2_1(): + return TorchSplit(2, 1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_torch_split_3_1(): + return TorchSplit(3, 1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_torch_split_3_2(): + return TorchSplit(3, 2) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_tensor_split_3_2(): + return TensorSplit(3, 2) \ No newline at end of file From d6a3fd59b689b082873fc28daa3b9184ad018379 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 29 Sep 2019 17:07:00 -0700 Subject: [PATCH 180/355] added shufflenet to module tests --- torch2trt/module_test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/torch2trt/module_test.py b/torch2trt/module_test.py index e9c1c48e..2df0da61 100644 --- a/torch2trt/module_test.py +++ b/torch2trt/module_test.py @@ -36,6 +36,10 @@ def module_name(self): ModuleTest(torchvision.models.vgg16_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), ModuleTest(torchvision.models.vgg19_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), ModuleTest(torchvision.models.mobilenet_v2, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.shufflenet_v2_x0_5, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.shufflenet_v2_x1_0, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.shufflenet_v2_x1_5, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.shufflenet_v2_x2_0, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), ] From a838a191c2183a889ac10dd28e3045e6bab78450 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 29 Sep 2019 20:42:11 -0700 Subject: [PATCH 181/355] added permute --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/permute.py | 46 ++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 torch2trt/converters/permute.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index b2d7c23e..dfc74a7f 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -18,6 +18,7 @@ from .max_pool2d import * from .normalize import * from .pad import * +from .permute import * from .relu import * from .ReLU import * from .relu6 import * diff --git a/torch2trt/converters/permute.py b/torch2trt/converters/permute.py new file mode 100644 index 00000000..10e3dc7c --- /dev/null +++ b/torch2trt/converters/permute.py @@ -0,0 +1,46 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.Tensor.permute') +def convert_transpose(ctx): + input = ctx.method_args[0] + output = ctx.method_return + + # permutation -1 because TRT does not include batch dim + permutation = ctx.method_args[1:] + assert(permutation[0] == 0) # cannot move batch dim + + trt_permutation = tuple([p - 1 for p in permutation])[1:] + + layer = ctx.network.add_shuffle(input._trt) + layer.second_transpose = tuple(trt_permutation) + + output._trt = layer.get_output(0) + + +class Permute(torch.nn.Module): + def __init__(self, *args): + super(Permute, self).__init__() + self.args = args + def forward(self, x): + return x.permute(*self.args).contiguous() + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)]) +def test_permute_2d_0123(): + return Permute(0, 1, 2, 3) + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)]) +def test_permute_2d_0312(): + return Permute(0, 3, 1, 2) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5, 6)]) +def test_permute_3d_01234(): + return Permute(0, 1, 2, 3, 4) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5, 6)]) +def test_permute_3d_04132(): + return Permute(0, 4, 1, 3, 2) From 7b46e02ce2ead5229ea6d079f6c643265ccb0c87 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 29 Sep 2019 20:46:14 -0700 Subject: [PATCH 182/355] fixed permute converter name --- torch2trt/converters/permute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch2trt/converters/permute.py b/torch2trt/converters/permute.py index 10e3dc7c..e37c1461 100644 --- a/torch2trt/converters/permute.py +++ b/torch2trt/converters/permute.py @@ -3,7 +3,7 @@ @tensorrt_converter('torch.Tensor.permute') -def convert_transpose(ctx): +def convert_permute(ctx): input = ctx.method_args[0] output = ctx.method_return From 72f0cd7d8d3b16527c37e08235e578f6ddf56f56 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 30 Sep 2019 20:19:05 -0700 Subject: [PATCH 183/355] added trt_ method to get/create/broadcast tensors --- torch2trt/torch2trt.py | 67 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 9f9317da..f36ae6a6 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -87,6 +87,73 @@ def add_trt_constant(network, tensor): return layer.get_output(0) +def check_torch_dtype(*tensors): + dtype = None + for t in tensors: + if isinstance(t, torch.Tensor): + if dtype is None: + dtype = t.dtype + else: + assert(dtype == t.dtype)#, 'Tensor data types must match') + assert(dtype is not None)#, 'Data type could not be inferred from any item in list') + return dtype + + +def trt_(network, *tensors): + """Creates missing TensorRT tensors and adds shuffle layers to make tensors broadcastable""" + trt_tensors = [None] * len(tensors) + + dtype = check_torch_dtype(*tensors) + + # get broadcast dimension + broadcast_num_dim = 0 + for t in tensors: + if isinstance(t, torch.Tensor): + num_dim = len(t.shape[1:]) # exclude batch + if num_dim > broadcast_num_dim: + broadcast_num_dim = num_dim + + + for i, t in enumerate(tensors): + trt_tensor = None + + # GET TRT TENSOR (OR CREATE TRT CONSTANT) + + # get tensor w/ _trt + if isinstance(t, torch.Tensor) and hasattr(t, '_trt'): + trt_tensor = t._trt + + # or... add constant for leaf tensor w/o _trt + elif isinstance(t, torch.Tensor) and t.is_leaf and not hasattr(t, '_trt'): + # add leaf tensor + shape = tuple(t.shape[1:]) + weight = t[0].detach().cpu().numpy() + t._trt = network.add_constant(shape, weight).get_output(0) + trt_tensor = t._trt + + # or... add constant for scalar primitive + elif isinstance(t, float) or isinstance(t, int): + shape = (1,) * broadcast_num_dim + scalar = t * torch.ones(shape, dtype=dtype).cpu().numpy() + trt_tensor = network.add_constant(shape, scalar).get_output(0) + + assert(trt_tensor is not None)#, 'TensorRT tensor could not be created') + + # MAKE TRT TENSOR BROADCASTABLE IF IT IS NOT ALREADY + + if len(trt_tensor.shape) != broadcast_num_dim: + # append 1 size dims to front + diff = broadcast_num_dim - len(trt_tensor.shape) + shape = tuple([1] * diff + list(trt_tensor.shape)) + layer = network.add_shuffle(trt_tensor) + layer.reshape_dims = shape + trt_tensor = layer.get_output(0) + + trt_tensors[i] = trt_tensor + + return tuple(trt_tensors) + + # CONVERSION REGISTRY AND HOOKS From 0bfe27df7628b009ec2ebc990bc5ba2304ce1d7a Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 30 Sep 2019 20:21:19 -0700 Subject: [PATCH 184/355] modified normalize to use trt_ --- torch2trt/converters/normalize.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/torch2trt/converters/normalize.py b/torch2trt/converters/normalize.py index f462dfea..2bdf8214 100644 --- a/torch2trt/converters/normalize.py +++ b/torch2trt/converters/normalize.py @@ -10,14 +10,11 @@ def convert_normalize(ctx): dim = get_arg(ctx, 'dim', pos=2, default=1) eps = get_arg(ctx, 'eps', pos=3, default=1e-12) - input_trt = input._trt +# input_trt = input._trt output = ctx.method_return # add broadcastable scalar constants to network - scalar_shape = (1,) * len(input.shape) - eps_trt = add_trt_constant(ctx.network, eps * torch.ones(scalar_shape, dtype=input.dtype)) - p_trt = add_trt_constant(ctx.network, p * torch.ones(scalar_shape, dtype=input.dtype)) - p_inv_trt = add_trt_constant(ctx.network, torch.ones(scalar_shape, dtype=input.dtype) / p) + input_trt, eps_trt, p_trt, p_inv_trt = trt_(ctx.network, input, eps, p, 1.0 / p) # compute norm = sum(abs(x)**p, dim=dim)**(1./p) norm = ctx.network.add_unary(input_trt, trt.UnaryOperation.ABS).get_output(0) From 5699dcdfde68234a9e68cdc8ab6af7cc35fef8b6 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 30 Sep 2019 21:18:37 -0700 Subject: [PATCH 185/355] fixed trt_ to handle single tenosr outputs --- torch2trt/torch2trt.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index f36ae6a6..46a05778 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -151,7 +151,10 @@ def trt_(network, *tensors): trt_tensors[i] = trt_tensor - return tuple(trt_tensors) + if len(trt_tensors) == 1: + return trt_tensors[0] + else: + return tuple(trt_tensors) # CONVERSION REGISTRY AND HOOKS From 4c21936e4999e380706d8ab570859de269d158c6 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 30 Sep 2019 21:19:21 -0700 Subject: [PATCH 186/355] updated converters to use trt_ instead of input._trt --- torch2trt/converters/AdaptiveAvgPool2d.py | 6 ++++-- torch2trt/converters/BatchNorm1d.py | 3 ++- torch2trt/converters/BatchNorm2d.py | 3 ++- torch2trt/converters/Conv1d.py | 3 ++- torch2trt/converters/Conv2d.py | 3 ++- torch2trt/converters/ConvTranspose2d.py | 3 ++- torch2trt/converters/Identity.py | 3 ++- torch2trt/converters/Linear.py | 3 ++- torch2trt/converters/LogSoftmax.py | 3 ++- torch2trt/converters/ReLU.py | 3 ++- torch2trt/converters/ReLU6.py | 17 +++++++++++------ torch2trt/converters/add.py | 3 ++- torch2trt/converters/avg_pool2d.py | 2 +- torch2trt/converters/cat.py | 2 +- torch2trt/converters/clamp.py | 3 ++- torch2trt/converters/div.py | 3 ++- torch2trt/converters/identity.py | 3 ++- torch2trt/converters/max_pool2d.py | 2 +- torch2trt/converters/mean.py | 3 ++- torch2trt/converters/mul.py | 3 ++- torch2trt/converters/pad.py | 3 ++- torch2trt/converters/permute.py | 3 ++- torch2trt/converters/sigmoid.py | 3 ++- torch2trt/converters/softmax.py | 3 ++- torch2trt/converters/split.py | 3 ++- torch2trt/converters/tanh.py | 3 ++- torch2trt/converters/transpose.py | 3 ++- torch2trt/converters/view.py | 3 ++- 28 files changed, 64 insertions(+), 34 deletions(-) diff --git a/torch2trt/converters/AdaptiveAvgPool2d.py b/torch2trt/converters/AdaptiveAvgPool2d.py index ea865156..b92fb975 100644 --- a/torch2trt/converters/AdaptiveAvgPool2d.py +++ b/torch2trt/converters/AdaptiveAvgPool2d.py @@ -7,16 +7,18 @@ def convert_AdaptiveAvgPool2d(ctx): module = ctx.method_args[0] input = ctx.method_args[1] output = ctx.method_return + + input_trt = trt_(ctx.network, input) output_size = module.output_size if not isinstance(output_size, tuple): output_size = (output_size, ) * 2 - stride = (input._trt.shape[-2] // output_size[-2], input._trt.shape[-1] // output_size[-1]) + stride = (input_trt.shape[-2] // output_size[-2], input_trt.shape[-1] // output_size[-1]) kernel_size = stride layer = ctx.network.add_pooling( - input=input._trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size) + input=input_trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size) layer.stride = stride output._trt = layer.get_output(0) diff --git a/torch2trt/converters/BatchNorm1d.py b/torch2trt/converters/BatchNorm1d.py index 51a7bf30..89bda117 100644 --- a/torch2trt/converters/BatchNorm1d.py +++ b/torch2trt/converters/BatchNorm1d.py @@ -6,6 +6,7 @@ def convert_BatchNorm2d(ctx): module = ctx.method_args[0] input = ctx.method_args[1] + input_trt = trt_(ctx.network, input) output = ctx.method_return scale = module.weight.detach().cpu().numpy() / np.sqrt(module.running_var.detach().cpu().numpy() + module.eps) @@ -13,7 +14,7 @@ def convert_BatchNorm2d(ctx): power = np.ones_like(scale) # reshape to 2D - layer = ctx.network.add_shuffle(input._trt) + layer = ctx.network.add_shuffle(input_trt) if len(input.shape) == 2: layer.reshape_dims = (input.shape[1], 1, 1) diff --git a/torch2trt/converters/BatchNorm2d.py b/torch2trt/converters/BatchNorm2d.py index 66ab7c55..0dff8486 100644 --- a/torch2trt/converters/BatchNorm2d.py +++ b/torch2trt/converters/BatchNorm2d.py @@ -5,12 +5,13 @@ def convert_BatchNorm2d(ctx): module = ctx.method_args[0] input = ctx.method_args[1] + input_trt = trt_(ctx.network, input) output = ctx.method_return scale = module.weight.detach().cpu().numpy() / np.sqrt(module.running_var.detach().cpu().numpy() + module.eps) bias = module.bias.detach().cpu().numpy() - module.running_mean.detach().cpu().numpy() * scale power = np.ones_like(scale) - layer = ctx.network.add_scale(input._trt, trt.ScaleMode.CHANNEL, bias, scale, power) + layer = ctx.network.add_scale(input_trt, trt.ScaleMode.CHANNEL, bias, scale, power) output._trt = layer.get_output(0) \ No newline at end of file diff --git a/torch2trt/converters/Conv1d.py b/torch2trt/converters/Conv1d.py index c65502e0..fe6cf189 100644 --- a/torch2trt/converters/Conv1d.py +++ b/torch2trt/converters/Conv1d.py @@ -6,6 +6,7 @@ def convert_Conv1d(ctx): module = ctx.method_args[0] input = ctx.method_args[1] + input_trt = trt_(ctx.network, input) output = ctx.method_return kernel_size = (module.kernel_size[0], 1) @@ -20,7 +21,7 @@ def convert_Conv1d(ctx): bias = module.bias.detach().cpu().numpy() # reshape to 2D - layer = ctx.network.add_shuffle(input._trt) + layer = ctx.network.add_shuffle(input_trt) layer.reshape_dims = (-1, input.shape[-1], 1) layer = ctx.network.add_convolution( diff --git a/torch2trt/converters/Conv2d.py b/torch2trt/converters/Conv2d.py index 160758dc..5bd6ec85 100644 --- a/torch2trt/converters/Conv2d.py +++ b/torch2trt/converters/Conv2d.py @@ -6,6 +6,7 @@ def convert_Conv2d(ctx): module = ctx.method_args[0] input = ctx.method_args[1] + input_trt = trt_(ctx.network, input) output = ctx.method_return kernel_size = module.kernel_size @@ -31,7 +32,7 @@ def convert_Conv2d(ctx): bias = module.bias.detach().cpu().numpy() layer = ctx.network.add_convolution( - input=input._trt, + input=input_trt, num_output_maps=module.out_channels, kernel_shape=kernel_size, kernel=kernel, diff --git a/torch2trt/converters/ConvTranspose2d.py b/torch2trt/converters/ConvTranspose2d.py index 700dc232..1b9af097 100644 --- a/torch2trt/converters/ConvTranspose2d.py +++ b/torch2trt/converters/ConvTranspose2d.py @@ -5,6 +5,7 @@ def convert_ConvTranspose2d(ctx): module = ctx.method_args[0] input = ctx.method_args[1] + input_trt = trt_(ctx.network, input) output = ctx.method_return kernel_size = module.kernel_size @@ -26,7 +27,7 @@ def convert_ConvTranspose2d(ctx): bias = module.bias.detach().cpu().numpy() layer = ctx.network.add_deconvolution( - input=input._trt, + input=input_trt, num_output_maps=module.out_channels, kernel_shape=kernel_size, kernel=kernel, diff --git a/torch2trt/converters/Identity.py b/torch2trt/converters/Identity.py index 0cdab1f1..761aff50 100644 --- a/torch2trt/converters/Identity.py +++ b/torch2trt/converters/Identity.py @@ -6,5 +6,6 @@ @tensorrt_converter('torch.nn.Dropout3d.forward') def convert_Identity(ctx): input = ctx.method_args[1] + input_trt = trt_(ctx.network, input) output = ctx.method_return - output._trt = input._trt \ No newline at end of file + output._trt = input_trt \ No newline at end of file diff --git a/torch2trt/converters/Linear.py b/torch2trt/converters/Linear.py index 5d495f5e..28534478 100644 --- a/torch2trt/converters/Linear.py +++ b/torch2trt/converters/Linear.py @@ -6,10 +6,11 @@ def convert_Linear(ctx): module = ctx.method_args[0] input = ctx.method_args[1] + input_trt = trt_(ctx.network, input) output = ctx.method_return # reshape to Nx1x1 - layer = ctx.network.add_shuffle(input._trt) + layer = ctx.network.add_shuffle(input_trt) layer.reshape_dims = (-1, 1, 1) # add fully connected diff --git a/torch2trt/converters/LogSoftmax.py b/torch2trt/converters/LogSoftmax.py index 065ed658..38108780 100644 --- a/torch2trt/converters/LogSoftmax.py +++ b/torch2trt/converters/LogSoftmax.py @@ -4,8 +4,9 @@ @tensorrt_converter('torch.nn.LogSoftmax.forward') def convert_LogSoftmax(ctx): input = ctx.method_args[1] + input_trt = trt_(ctx.network, input) output = ctx.method_return - layer = ctx.network.add_softmax(input=input._trt) + layer = ctx.network.add_softmax(input=input_trt) layer = ctx.network.add_unary(input=layer.get_output(0), op=trt.UnaryOperation.LOG) output._trt = layer.get_output(0) \ No newline at end of file diff --git a/torch2trt/converters/ReLU.py b/torch2trt/converters/ReLU.py index ffa9d4ca..e10feb27 100644 --- a/torch2trt/converters/ReLU.py +++ b/torch2trt/converters/ReLU.py @@ -4,7 +4,8 @@ @tensorrt_converter('torch.nn.ReLU.forward') def convert_ReLU(ctx): input = ctx.method_args[1] + input_trt = trt_(ctx.network, input) output = ctx.method_return layer = ctx.network.add_activation( - input=input._trt, type=trt.ActivationType.RELU) + input=input_trt, type=trt.ActivationType.RELU) output._trt = layer.get_output(0) \ No newline at end of file diff --git a/torch2trt/converters/ReLU6.py b/torch2trt/converters/ReLU6.py index b2aacf20..7c9ff588 100644 --- a/torch2trt/converters/ReLU6.py +++ b/torch2trt/converters/ReLU6.py @@ -1,17 +1,22 @@ from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test @tensorrt_converter('torch.nn.ReLU6.forward') def convert_ReLU6(ctx): input = ctx.method_args[1] output = ctx.method_return + + input_trt, trt_6 = trt_(ctx.network, input, 6) layer = ctx.network.add_activation( - input=input._trt, type=trt.ActivationType.RELU) - shape = (1, ) * len(input._trt.shape) # broadcast all dimensions - tensor = 6.0 * torch.ones(shape, dtype=torch_dtype_from_trt(input._trt.dtype)).cpu().numpy() - trt_6 = ctx.network.add_constant(shape, tensor) + input=input_trt, type=trt.ActivationType.RELU) layer = ctx.network.add_elementwise( - layer.get_output(0), trt_6.get_output(0), trt.ElementWiseOperation.MIN) + layer.get_output(0), trt_6, trt.ElementWiseOperation.MIN) - output._trt = layer.get_output(0) \ No newline at end of file + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)]) +def test_relu6_basic(): + return torch.nn.ReLU6() \ No newline at end of file diff --git a/torch2trt/converters/add.py b/torch2trt/converters/add.py index 3a51aa25..21cf9a47 100644 --- a/torch2trt/converters/add.py +++ b/torch2trt/converters/add.py @@ -8,8 +8,9 @@ def convert_add(ctx): input_a = ctx.method_args[0] input_b = ctx.method_args[1] + input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) output = ctx.method_return - layer = ctx.network.add_elementwise(input_a._trt, input_b._trt, trt.ElementWiseOperation.SUM) + layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUM) output._trt = layer.get_output(0) diff --git a/torch2trt/converters/avg_pool2d.py b/torch2trt/converters/avg_pool2d.py index 897b5870..0b905d01 100644 --- a/torch2trt/converters/avg_pool2d.py +++ b/torch2trt/converters/avg_pool2d.py @@ -13,7 +13,7 @@ def convert_avg_pool2d(ctx): count_include_pad = get_arg(ctx, 'count_include_pad', pos=5, default=True) # get input trt tensor (or create constant if it doesn't exist) - input_trt = input._trt + input_trt = trt_(ctx.network, input) output = ctx.method_return diff --git a/torch2trt/converters/cat.py b/torch2trt/converters/cat.py index 706dc032..39f5b41d 100644 --- a/torch2trt/converters/cat.py +++ b/torch2trt/converters/cat.py @@ -11,7 +11,7 @@ def convert_cat(ctx): dim = ctx.method_args[1] output = ctx.method_return - trt_inputs = [i._trt for i in inputs] + trt_inputs = [trt_(ctx.network, i) for i in inputs] layer = ctx.network.add_concatenation(inputs=trt_inputs) layer.axis = dim - 1 diff --git a/torch2trt/converters/clamp.py b/torch2trt/converters/clamp.py index 50e1d3a0..d402b272 100644 --- a/torch2trt/converters/clamp.py +++ b/torch2trt/converters/clamp.py @@ -20,10 +20,11 @@ def __add_clamp(network, trt_input, val, op): @tensorrt_converter('torch.Tensor.clamp_min') def convert_clamp_min(ctx): input = ctx.method_args[0] + input_trt = trt_(ctx.network, input) val = ctx.method_args[1] output = ctx.method_return - layer = __add_clamp(ctx.network, input._trt, val, trt.ElementWiseOperation.MAX) + layer = __add_clamp(ctx.network, input_trt, val, trt.ElementWiseOperation.MAX) output._trt = layer.get_output(0) diff --git a/torch2trt/converters/div.py b/torch2trt/converters/div.py index df3fd89c..73f4c5f6 100644 --- a/torch2trt/converters/div.py +++ b/torch2trt/converters/div.py @@ -8,8 +8,9 @@ def convert_div(ctx): input_a = ctx.method_args[0] input_b = ctx.method_args[1] + input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) output = ctx.method_return - layer = ctx.network.add_elementwise(input_a._trt, input_b._trt, trt.ElementWiseOperation.DIV) + layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.DIV) output._trt = layer.get_output(0) diff --git a/torch2trt/converters/identity.py b/torch2trt/converters/identity.py index 2b86f138..45a02fd9 100644 --- a/torch2trt/converters/identity.py +++ b/torch2trt/converters/identity.py @@ -7,5 +7,6 @@ @tensorrt_converter('torch.nn.functional.dropout3d') def convert_identity(ctx): input = ctx.method_args[0] + input_trt = trt_(ctx.network, input) output = ctx.method_return - output._trt = input._trt + output._trt = input_trt diff --git a/torch2trt/converters/max_pool2d.py b/torch2trt/converters/max_pool2d.py index 18956b23..e2fc9859 100644 --- a/torch2trt/converters/max_pool2d.py +++ b/torch2trt/converters/max_pool2d.py @@ -13,7 +13,7 @@ def convert_max_pool2d(ctx): ceil_mode = get_arg(ctx, 'ceil_mode', pos=5, default=False) # get input trt tensor (or create constant if it doesn't exist) - input_trt = input._trt + input_trt = trt_(ctx.network, input) output = ctx.method_return diff --git a/torch2trt/converters/mean.py b/torch2trt/converters/mean.py index 9e34e2d8..4ad2a9e1 100644 --- a/torch2trt/converters/mean.py +++ b/torch2trt/converters/mean.py @@ -6,6 +6,7 @@ @tensorrt_converter('torch.Tensor.mean') def convert_mean(ctx): input = ctx.method_args[0] + input_trt = trt_(ctx.network, input) output = ctx.method_return # get dims from args or kwargs @@ -34,7 +35,7 @@ def convert_mean(ctx): else: keep_dims = False - layer = ctx.network.add_reduce(input._trt, trt.ReduceOperation.AVG, axes, keep_dims) + layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.AVG, axes, keep_dims) output._trt = layer.get_output(0) diff --git a/torch2trt/converters/mul.py b/torch2trt/converters/mul.py index 53ee3b71..087a2ecb 100644 --- a/torch2trt/converters/mul.py +++ b/torch2trt/converters/mul.py @@ -8,8 +8,9 @@ def convert_mul(ctx): input_a = ctx.method_args[0] input_b = ctx.method_args[1] + input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) output = ctx.method_return - layer = ctx.network.add_elementwise(input_a._trt, input_b._trt, trt.ElementWiseOperation.PROD) + layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.PROD) output._trt = layer.get_output(0) diff --git a/torch2trt/converters/pad.py b/torch2trt/converters/pad.py index 6d8ffeb8..51df5526 100644 --- a/torch2trt/converters/pad.py +++ b/torch2trt/converters/pad.py @@ -5,6 +5,7 @@ @tensorrt_converter('torch.nn.functional.pad') def convert_pad(ctx): input = ctx.method_args[0] + input_trt = trt_(ctx.network, input) output = ctx.method_return pad = ctx.method_args[1] @@ -13,7 +14,7 @@ def convert_pad(ctx): # mode / value are ignored since not supported by TensorRT - layer = ctx.network.add_padding(input._trt, pre_padding, post_padding) + layer = ctx.network.add_padding(input_trt, pre_padding, post_padding) output._trt = layer.get_output(0) diff --git a/torch2trt/converters/permute.py b/torch2trt/converters/permute.py index e37c1461..110c4e6b 100644 --- a/torch2trt/converters/permute.py +++ b/torch2trt/converters/permute.py @@ -5,6 +5,7 @@ @tensorrt_converter('torch.Tensor.permute') def convert_permute(ctx): input = ctx.method_args[0] + input_trt = trt_(ctx.network, input) output = ctx.method_return # permutation -1 because TRT does not include batch dim @@ -13,7 +14,7 @@ def convert_permute(ctx): trt_permutation = tuple([p - 1 for p in permutation])[1:] - layer = ctx.network.add_shuffle(input._trt) + layer = ctx.network.add_shuffle(input_trt) layer.second_transpose = tuple(trt_permutation) output._trt = layer.get_output(0) diff --git a/torch2trt/converters/sigmoid.py b/torch2trt/converters/sigmoid.py index ed75d5e9..2443cf74 100644 --- a/torch2trt/converters/sigmoid.py +++ b/torch2trt/converters/sigmoid.py @@ -6,9 +6,10 @@ @tensorrt_converter('torch.sigmoid') def convert_sigmoid(ctx): input = ctx.method_args[0] + input_trt = trt_(ctx.network, input) output = ctx.method_return - layer = ctx.network.add_activation(input._trt, trt.ActivationType.SIGMOID) + layer = ctx.network.add_activation(input_trt, trt.ActivationType.SIGMOID) output._trt = layer.get_output(0) diff --git a/torch2trt/converters/softmax.py b/torch2trt/converters/softmax.py index 9d174fce..74c56d72 100644 --- a/torch2trt/converters/softmax.py +++ b/torch2trt/converters/softmax.py @@ -5,6 +5,7 @@ @tensorrt_converter('torch.nn.functional.softmax') def convert_softmax(ctx): input = ctx.method_args[0] + input_trt = trt_(ctx.network, input) output = ctx.method_return # get dims from args or kwargs @@ -15,7 +16,7 @@ def convert_softmax(ctx): axes = 1 << (dim - 1) - layer = ctx.network.add_softmax(input=input._trt) + layer = ctx.network.add_softmax(input=input_trt) layer.axes = axes output._trt = layer.get_output(0) diff --git a/torch2trt/converters/split.py b/torch2trt/converters/split.py index 22219c26..23f964bd 100644 --- a/torch2trt/converters/split.py +++ b/torch2trt/converters/split.py @@ -6,6 +6,7 @@ @tensorrt_converter('torch.Tensor.split') def convert_split(ctx): input = get_arg(ctx, 'input', 0, None) + input_trt = trt_(ctx.network, input) # we don't need to parse split/chunk (arg 1) # since we infer size from output tensors dim = get_arg(ctx, 'dim', 2, 0) @@ -23,7 +24,7 @@ def convert_split(ctx): for i, output in enumerate(outputs): shape = list(output.shape[1:]) # exclude batch dim start[trt_dim] = offset - layer = ctx.network.add_slice(input._trt, start=start, shape=shape, stride=stride) + layer = ctx.network.add_slice(input_trt, start=start, shape=shape, stride=stride) output._trt = layer.get_output(0) offset = offset + shape[trt_dim] diff --git a/torch2trt/converters/tanh.py b/torch2trt/converters/tanh.py index 5eb5eef3..f6ec83fe 100644 --- a/torch2trt/converters/tanh.py +++ b/torch2trt/converters/tanh.py @@ -6,9 +6,10 @@ @tensorrt_converter('torch.tanh') def convert_tanh(ctx): input = ctx.method_args[0] + input_trt = trt_(ctx.network, input) output = ctx.method_return - layer = ctx.network.add_activation(input._trt, trt.ActivationType.TANH) + layer = ctx.network.add_activation(input_trt, trt.ActivationType.TANH) output._trt = layer.get_output(0) diff --git a/torch2trt/converters/transpose.py b/torch2trt/converters/transpose.py index 3c93fd4b..2ea5f7bb 100644 --- a/torch2trt/converters/transpose.py +++ b/torch2trt/converters/transpose.py @@ -5,6 +5,7 @@ @tensorrt_converter('torch.transpose') def convert_transpose(ctx): input = ctx.method_args[0] + input_trt = trt_(ctx.network, input) output = ctx.method_return # permutation -1 because TRT does not include batch dim permutation = list(range(len(input.shape) - 1)) @@ -12,7 +13,7 @@ def convert_transpose(ctx): dim1 = ctx.method_args[2] - 1 permutation[dim0] = dim1 permutation[dim1] = dim0 - layer = ctx.network.add_shuffle(input._trt) + layer = ctx.network.add_shuffle(input_trt) layer.second_transpose = tuple(permutation) output._trt = layer.get_output(0) diff --git a/torch2trt/converters/view.py b/torch2trt/converters/view.py index d5255239..0325a83e 100644 --- a/torch2trt/converters/view.py +++ b/torch2trt/converters/view.py @@ -7,8 +7,9 @@ @tensorrt_converter('torch.Tensor.view') def convert_view(ctx): input = ctx.method_args[0] + input_trt = trt_(ctx.network, input) output = ctx.method_return - layer = ctx.network.add_shuffle(input._trt) + layer = ctx.network.add_shuffle(input_trt) layer.reshape_dims = tuple(output.shape[1:]) output._trt = layer.get_output(0) From 9745e3172321c3540d612e78a17abb385890949d Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 30 Sep 2019 21:24:42 -0700 Subject: [PATCH 187/355] added __radd__ to support left hand primitives --- torch2trt/converters/add.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/torch2trt/converters/add.py b/torch2trt/converters/add.py index 21cf9a47..1392fbab 100644 --- a/torch2trt/converters/add.py +++ b/torch2trt/converters/add.py @@ -5,6 +5,7 @@ @tensorrt_converter('torch.add') @tensorrt_converter('torch.Tensor.__iadd__') @tensorrt_converter('torch.Tensor.__add__') +@tensorrt_converter('torch.Tensor.__radd__') def convert_add(ctx): input_a = ctx.method_args[0] input_b = ctx.method_args[1] @@ -50,4 +51,30 @@ def forward(self, x, y): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) def test_add_torchadd(): - return TorchAdd() \ No newline at end of file + return TorchAdd() + + +class RAddInt(torch.nn.Module): + def __init__(self): + super(RAddInt, self).__init__() + + def forward(self, x): + return 1 + x + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_add_radd_int(): + return RAddInt() + + +class RAddFloat(torch.nn.Module): + def __init__(self): + super(RAddFloat, self).__init__() + + def forward(self, x): + return 1.0 + x + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_add_radd_float(): + return RAddFloat() \ No newline at end of file From 9bcadbb332f3ad98df5f0b0d8c9847afd6a4b524 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 30 Sep 2019 21:36:47 -0700 Subject: [PATCH 188/355] added rdiv and radd --- torch2trt/converters/add.py | 11 ++++++++++- torch2trt/converters/div.py | 38 ++++++++++++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/torch2trt/converters/add.py b/torch2trt/converters/add.py index 1392fbab..1a743e7a 100644 --- a/torch2trt/converters/add.py +++ b/torch2trt/converters/add.py @@ -5,7 +5,6 @@ @tensorrt_converter('torch.add') @tensorrt_converter('torch.Tensor.__iadd__') @tensorrt_converter('torch.Tensor.__add__') -@tensorrt_converter('torch.Tensor.__radd__') def convert_add(ctx): input_a = ctx.method_args[0] input_b = ctx.method_args[1] @@ -14,6 +13,16 @@ def convert_add(ctx): layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUM) output._trt = layer.get_output(0) + +@tensorrt_converter('torch.Tensor.__radd__') +def convert_radd(ctx): + input_a = ctx.method_args[1] # flipped for radd + input_b = ctx.method_args[0] + input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) + output = ctx.method_return + layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUM) + output._trt = layer.get_output(0) + class Add(torch.nn.Module): def __init__(self): diff --git a/torch2trt/converters/div.py b/torch2trt/converters/div.py index 73f4c5f6..c6df7c18 100644 --- a/torch2trt/converters/div.py +++ b/torch2trt/converters/div.py @@ -14,6 +14,16 @@ def convert_div(ctx): output._trt = layer.get_output(0) +@tensorrt_converter('torch.Tensor.__rtruediv__') +def convert_rdiv(ctx): + input_a = ctx.method_args[1] + input_b = ctx.method_args[0] + input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) + output = ctx.method_return + layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.DIV) + output._trt = layer.get_output(0) + + class Div(torch.nn.Module): def __init__(self): super(Div, self).__init__() @@ -51,4 +61,30 @@ def forward(self, x, y): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) def test_div_torchdiv(): - return TorchDiv() \ No newline at end of file + return TorchDiv() + + +class RDivInt(torch.nn.Module): + def __init__(self): + super(RDivInt, self).__init__() + + def forward(self, x): + return 100 / x + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_add_rdiv_int(): + return RDivInt() + + +class RDivFloat(torch.nn.Module): + def __init__(self): + super(RDivFloat, self).__init__() + + def forward(self, x): + return 100.0 / x + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_add_rdiv_float(): + return RDivFloat() \ No newline at end of file From 3d2fef8bc1ac3fbc99ff02457870fe5791f71cdf Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 30 Sep 2019 21:41:29 -0700 Subject: [PATCH 189/355] added rdiv, rmul, radd --- torch2trt/converters/add.py | 11 +---------- torch2trt/converters/div.py | 6 +++--- torch2trt/converters/mul.py | 29 ++++++++++++++++++++++++++++- 3 files changed, 32 insertions(+), 14 deletions(-) diff --git a/torch2trt/converters/add.py b/torch2trt/converters/add.py index 1a743e7a..0ca9e095 100644 --- a/torch2trt/converters/add.py +++ b/torch2trt/converters/add.py @@ -5,6 +5,7 @@ @tensorrt_converter('torch.add') @tensorrt_converter('torch.Tensor.__iadd__') @tensorrt_converter('torch.Tensor.__add__') +@tensorrt_converter('torch.Tensor.__radd__') def convert_add(ctx): input_a = ctx.method_args[0] input_b = ctx.method_args[1] @@ -12,16 +13,6 @@ def convert_add(ctx): output = ctx.method_return layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUM) output._trt = layer.get_output(0) - - -@tensorrt_converter('torch.Tensor.__radd__') -def convert_radd(ctx): - input_a = ctx.method_args[1] # flipped for radd - input_b = ctx.method_args[0] - input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) - output = ctx.method_return - layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUM) - output._trt = layer.get_output(0) class Add(torch.nn.Module): diff --git a/torch2trt/converters/div.py b/torch2trt/converters/div.py index c6df7c18..df5cac18 100644 --- a/torch2trt/converters/div.py +++ b/torch2trt/converters/div.py @@ -16,7 +16,7 @@ def convert_div(ctx): @tensorrt_converter('torch.Tensor.__rtruediv__') def convert_rdiv(ctx): - input_a = ctx.method_args[1] + input_a = ctx.method_args[1] # inputs switched for rdiv input_b = ctx.method_args[0] input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) output = ctx.method_return @@ -73,7 +73,7 @@ def forward(self, x): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) -def test_add_rdiv_int(): +def test_rdiv_int(): return RDivInt() @@ -86,5 +86,5 @@ def forward(self, x): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) -def test_add_rdiv_float(): +def test_rdiv_float(): return RDivFloat() \ No newline at end of file diff --git a/torch2trt/converters/mul.py b/torch2trt/converters/mul.py index 087a2ecb..e3b2f991 100644 --- a/torch2trt/converters/mul.py +++ b/torch2trt/converters/mul.py @@ -5,6 +5,7 @@ @tensorrt_converter('torch.mul') @tensorrt_converter('torch.Tensor.__imul__') @tensorrt_converter('torch.Tensor.__mul__') +@tensorrt_converter('torch.Tensor.__rmul__') def convert_mul(ctx): input_a = ctx.method_args[0] input_b = ctx.method_args[1] @@ -50,4 +51,30 @@ def forward(self, x, y): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) def test_mul_torchmul(): - return TorchMul() \ No newline at end of file + return TorchMul() + + +class RMulInt(torch.nn.Module): + def __init__(self): + super(RMulInt, self).__init__() + + def forward(self, x): + return 10 * x + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_rmul_int(): + return RMulInt() + + +class RMulFloat(torch.nn.Module): + def __init__(self): + super(RMulFloat, self).__init__() + + def forward(self, x): + return 10.0 * x + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_rmul_float(): + return RMulFloat() \ No newline at end of file From e69c577eae5a22da14e8a1a03fbb258b32cf30ec Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 30 Sep 2019 21:59:15 -0700 Subject: [PATCH 190/355] replcaed input._trt with trt_ in interpolate --- torch2trt/converters/interpolate/interpolate.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torch2trt/converters/interpolate/interpolate.py b/torch2trt/converters/interpolate/interpolate.py index cdf763b7..691f0c3c 100644 --- a/torch2trt/converters/interpolate/interpolate.py +++ b/torch2trt/converters/interpolate/interpolate.py @@ -16,6 +16,7 @@ def get_interpolate_plugin(size, mode, align_corners): @tensorrt_converter('torch.nn.functional.interpolate') def convert_interpolate(ctx): input = ctx.method_args[0] + input_trt = trt_(ctx.network, input) output = ctx.method_return try: @@ -33,7 +34,7 @@ def convert_interpolate(ctx): plugin = get_interpolate_plugin(size=size, mode=mode, align_corners=align_corners) - layer = ctx.network.add_plugin_v2([input._trt], plugin) + layer = ctx.network.add_plugin_v2([input_trt], plugin) output._trt = layer.get_output(0) From f27aa2790cfebaa76fb5a018737176392eb13140 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 1 Oct 2019 14:14:34 -0700 Subject: [PATCH 191/355] added mnasnet to test --- torch2trt/module_test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/torch2trt/module_test.py b/torch2trt/module_test.py index 2df0da61..63a1cc2a 100644 --- a/torch2trt/module_test.py +++ b/torch2trt/module_test.py @@ -40,6 +40,10 @@ def module_name(self): ModuleTest(torchvision.models.shufflenet_v2_x1_0, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), ModuleTest(torchvision.models.shufflenet_v2_x1_5, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), ModuleTest(torchvision.models.shufflenet_v2_x2_0, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.mnasnet0_5, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.mnasnet0_75, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.mnasnet1_0, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), + ModuleTest(torchvision.models.mnasnet1_3, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), ] From f29e1c9446d025c4003e5d93da7f0053bb5ab17d Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 1 Oct 2019 17:54:47 -0700 Subject: [PATCH 192/355] refactored tests --- test.sh | 42 ++--- torch2trt/module_test.py | 27 +--- torch2trt/test.py | 5 + torch2trt/tests/__init__.py | 0 torch2trt/tests/torchvision/__init__.py | 0 torch2trt/tests/torchvision/classification.py | 148 ++++++++++++++++++ torch2trt/tests/torchvision/segmentation.py | 39 +++++ 7 files changed, 214 insertions(+), 47 deletions(-) create mode 100644 torch2trt/tests/__init__.py create mode 100644 torch2trt/tests/torchvision/__init__.py create mode 100644 torch2trt/tests/torchvision/classification.py create mode 100644 torch2trt/tests/torchvision/segmentation.py diff --git a/test.sh b/test.sh index 623004d7..3bf4a4d8 100755 --- a/test.sh +++ b/test.sh @@ -7,24 +7,24 @@ touch $OUTPUT_FILE echo "| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | Throughput (PyTorch) | Throughput (TensorRT) | Latency (PyTorch) | Latency (TensorRT) |" >> $OUTPUT_FILE echo "|------|-----------|--------------|------------------|-----------|----------------------|-----------------------|-------------------|--------------------|" >> $OUTPUT_FILE -python3 -m torch2trt.test -o $OUTPUT_FILE --name alexnet -python3 -m torch2trt.test -o $OUTPUT_FILE --name squeezenet1_0 -python3 -m torch2trt.test -o $OUTPUT_FILE --name squeezenet1_1 -python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet18 -python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet34 -python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet50 -python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet101 -python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet152 -python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet121 -python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet169 -python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet201 -python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet161 -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg11$ -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg13$ -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg16$ -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg19$ -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg11_bn -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg13_bn -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg16_bn -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg19_bn -python3 -m torch2trt.test -o $OUTPUT_FILE --name mobilenet_v2 +python3 -m torch2trt.test -o $OUTPUT_FILE --name alexnet --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name squeezenet1_0 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name squeezenet1_1 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet18 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet34 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet50 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet101 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet152 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet121 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet169 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet201 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet161 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg11$ --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg13$ --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg16$ --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg19$ --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg11_bn --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg13_bn --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg16_bn --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg19_bn --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name mobilenet_v2 --include=torch2trt.tests.torchvision.classification diff --git a/torch2trt/module_test.py b/torch2trt/module_test.py index 2df0da61..b590bd95 100644 --- a/torch2trt/module_test.py +++ b/torch2trt/module_test.py @@ -15,31 +15,6 @@ def module_name(self): MODULE_TESTS = [ - ModuleTest(torchvision.models.alexnet, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.squeezenet1_0, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.squeezenet1_1, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.resnet18, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.resnet34, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.resnet50, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.resnet101, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.resnet152, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.densenet121, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.densenet169, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.densenet201, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.densenet161, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg11, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg13, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg16, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg19, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg11_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg13_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg16_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg19_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.mobilenet_v2, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.shufflenet_v2_x0_5, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.shufflenet_v2_x1_0, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.shufflenet_v2_x1_5, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.shufflenet_v2_x2_0, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), ] @@ -48,4 +23,4 @@ def register_module_test(module): global MODULE_TESTS MODULE_TESTS += [ModuleTest(module, dtype, device, input_shapes, **torch2trt_kwargs)] return module - return register_module_test + return register_module_test \ No newline at end of file diff --git a/torch2trt/test.py b/torch2trt/test.py index 890e9261..cecaaca6 100644 --- a/torch2trt/test.py +++ b/torch2trt/test.py @@ -3,6 +3,7 @@ import time import argparse import re +import runpy from termcolor import colored @@ -87,7 +88,11 @@ def run(self): parser.add_argument('--output', '-o', help='Test output file path', type=str, default='torch2trt_test.md') parser.add_argument('--name', help='Regular expression to filter modules to test by name', type=str, default='.*') parser.add_argument('--tolerance', help='Maximum error to print warning for entry', type=float, default='-1') + parser.add_argument('--include', help='Addition python file to include defining additional tests', action='append') args = parser.parse_args() + + for include in args.include: + runpy.run_module(include) for test in MODULE_TESTS: diff --git a/torch2trt/tests/__init__.py b/torch2trt/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/torch2trt/tests/torchvision/__init__.py b/torch2trt/tests/torchvision/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/torch2trt/tests/torchvision/classification.py b/torch2trt/tests/torchvision/classification.py new file mode 100644 index 00000000..acf15447 --- /dev/null +++ b/torch2trt/tests/torchvision/classification.py @@ -0,0 +1,148 @@ +import torch +import torchvision +from torch2trt.module_test import add_module_test + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def alexnet(): + return torchvision.models.alexnet(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def squeezenet1_0(): + return torchvision.models.squeezenet1_0(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def squeezenet1_1(): + return torchvision.models.squeezenet1_1(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def resnet18(): + return torchvision.models.resnet18(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def resnet34(): + return torchvision.models.resnet34(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def resnet50(): + return torchvision.models.resnet50(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def resnet101(): + return torchvision.models.resnet101(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def resnet152(): + return torchvision.models.resnet152(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def densenet121(): + return torchvision.models.densenet121(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def densenet169(): + return torchvision.models.densenet169(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def densenet201(): + return torchvision.models.densenet201(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def densenet161(): + return torchvision.models.densenet161(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def vgg11(): + return torchvision.models.vgg11(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def vgg13(): + return torchvision.models.vgg13(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def vgg16(): + return torchvision.models.vgg16(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def vgg19(): + return torchvision.models.vgg19(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def vgg11_bn(): + return torchvision.models.vgg11_bn(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def vgg13_bn(): + return torchvision.models.vgg13_bn(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def vgg16_bn(): + return torchvision.models.vgg16_bn(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def vgg19_bn(): + return torchvision.models.vgg19_bn(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def mobilenet_v2(): + return torchvision.models.mobilenet_v2(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def shufflenet_v2_x0_5(): + return torchvision.models.shufflenet_v2_x0_5(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def shufflenet_v2_x1_0(): + return torchvision.models.shufflenet_v2_x1_0(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def shufflenet_v2_x1_5(): + return torchvision.models.shufflenet_v2_x1_5(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def shufflenet_v2_x2_0(): + return torchvision.models.shufflenet_v2_x2_0(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def mnasnet0_5(): + return torchvision.models.mnasnet0_5(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def mnasnet0_75(): + return torchvision.models.mnasnet0_75(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def mnasnet1_0(): + return torchvision.models.mnasnet1_0(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def mnasnet1_3(): + return torchvision.models.mnasnet1_3(pretrained=False) \ No newline at end of file diff --git a/torch2trt/tests/torchvision/segmentation.py b/torch2trt/tests/torchvision/segmentation.py new file mode 100644 index 00000000..6cc915dd --- /dev/null +++ b/torch2trt/tests/torchvision/segmentation.py @@ -0,0 +1,39 @@ +import torch +import torchvision +from torch2trt.module_test import add_module_test + + +class ModelWrapper(torch.nn.Module): + def __init__(self, model): + super(ModelWrapper, self).__init__() + self.model = model + def forward(self, x): + return self.model(x)['out'] + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def deeplabv3_resnet50(): + bb = torchvision.models.segmentation.deeplabv3_resnet50(pretrained=False) + model = ModelWrapper(bb) + return model + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def deeplabv3_resnet101(): + bb = torchvision.models.segmentation.deeplabv3_resnet101(pretrained=False) + model = ModelWrapper(bb) + return model + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def fcn_resnet50(): + bb = torchvision.models.segmentation.fcn_resnet50(pretrained=False) + model = ModelWrapper(bb) + return model + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def fcn_resnet101(): + bb = torchvision.models.segmentation.fcn_resnet101(pretrained=False) + model = ModelWrapper(bb) + return model \ No newline at end of file From 5b1d55265c5b1cf481e9ef36cc0078b090b06d76 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 1 Oct 2019 17:54:47 -0700 Subject: [PATCH 193/355] refactored tests --- test.sh | 42 ++--- torch2trt/module_test.py | 31 +--- torch2trt/test.py | 5 + torch2trt/tests/__init__.py | 0 torch2trt/tests/torchvision/__init__.py | 0 torch2trt/tests/torchvision/classification.py | 148 ++++++++++++++++++ torch2trt/tests/torchvision/segmentation.py | 39 +++++ 7 files changed, 214 insertions(+), 51 deletions(-) create mode 100644 torch2trt/tests/__init__.py create mode 100644 torch2trt/tests/torchvision/__init__.py create mode 100644 torch2trt/tests/torchvision/classification.py create mode 100644 torch2trt/tests/torchvision/segmentation.py diff --git a/test.sh b/test.sh index 623004d7..3bf4a4d8 100755 --- a/test.sh +++ b/test.sh @@ -7,24 +7,24 @@ touch $OUTPUT_FILE echo "| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | Throughput (PyTorch) | Throughput (TensorRT) | Latency (PyTorch) | Latency (TensorRT) |" >> $OUTPUT_FILE echo "|------|-----------|--------------|------------------|-----------|----------------------|-----------------------|-------------------|--------------------|" >> $OUTPUT_FILE -python3 -m torch2trt.test -o $OUTPUT_FILE --name alexnet -python3 -m torch2trt.test -o $OUTPUT_FILE --name squeezenet1_0 -python3 -m torch2trt.test -o $OUTPUT_FILE --name squeezenet1_1 -python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet18 -python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet34 -python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet50 -python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet101 -python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet152 -python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet121 -python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet169 -python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet201 -python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet161 -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg11$ -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg13$ -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg16$ -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg19$ -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg11_bn -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg13_bn -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg16_bn -python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg19_bn -python3 -m torch2trt.test -o $OUTPUT_FILE --name mobilenet_v2 +python3 -m torch2trt.test -o $OUTPUT_FILE --name alexnet --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name squeezenet1_0 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name squeezenet1_1 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet18 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet34 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet50 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet101 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name resnet152 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet121 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet169 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet201 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name densenet161 --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg11$ --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg13$ --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg16$ --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg19$ --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg11_bn --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg13_bn --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg16_bn --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name vgg19_bn --include=torch2trt.tests.torchvision.classification +python3 -m torch2trt.test -o $OUTPUT_FILE --name mobilenet_v2 --include=torch2trt.tests.torchvision.classification diff --git a/torch2trt/module_test.py b/torch2trt/module_test.py index 63a1cc2a..b590bd95 100644 --- a/torch2trt/module_test.py +++ b/torch2trt/module_test.py @@ -15,35 +15,6 @@ def module_name(self): MODULE_TESTS = [ - ModuleTest(torchvision.models.alexnet, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.squeezenet1_0, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.squeezenet1_1, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.resnet18, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.resnet34, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.resnet50, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.resnet101, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.resnet152, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.densenet121, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.densenet169, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.densenet201, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.densenet161, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg11, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg13, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg16, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg19, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg11_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg13_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg16_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.vgg19_bn, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.mobilenet_v2, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.shufflenet_v2_x0_5, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.shufflenet_v2_x1_0, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.shufflenet_v2_x1_5, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.shufflenet_v2_x2_0, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.mnasnet0_5, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.mnasnet0_75, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.mnasnet1_0, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), - ModuleTest(torchvision.models.mnasnet1_3, torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True), ] @@ -52,4 +23,4 @@ def register_module_test(module): global MODULE_TESTS MODULE_TESTS += [ModuleTest(module, dtype, device, input_shapes, **torch2trt_kwargs)] return module - return register_module_test + return register_module_test \ No newline at end of file diff --git a/torch2trt/test.py b/torch2trt/test.py index 890e9261..cecaaca6 100644 --- a/torch2trt/test.py +++ b/torch2trt/test.py @@ -3,6 +3,7 @@ import time import argparse import re +import runpy from termcolor import colored @@ -87,7 +88,11 @@ def run(self): parser.add_argument('--output', '-o', help='Test output file path', type=str, default='torch2trt_test.md') parser.add_argument('--name', help='Regular expression to filter modules to test by name', type=str, default='.*') parser.add_argument('--tolerance', help='Maximum error to print warning for entry', type=float, default='-1') + parser.add_argument('--include', help='Addition python file to include defining additional tests', action='append') args = parser.parse_args() + + for include in args.include: + runpy.run_module(include) for test in MODULE_TESTS: diff --git a/torch2trt/tests/__init__.py b/torch2trt/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/torch2trt/tests/torchvision/__init__.py b/torch2trt/tests/torchvision/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/torch2trt/tests/torchvision/classification.py b/torch2trt/tests/torchvision/classification.py new file mode 100644 index 00000000..acf15447 --- /dev/null +++ b/torch2trt/tests/torchvision/classification.py @@ -0,0 +1,148 @@ +import torch +import torchvision +from torch2trt.module_test import add_module_test + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def alexnet(): + return torchvision.models.alexnet(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def squeezenet1_0(): + return torchvision.models.squeezenet1_0(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def squeezenet1_1(): + return torchvision.models.squeezenet1_1(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def resnet18(): + return torchvision.models.resnet18(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def resnet34(): + return torchvision.models.resnet34(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def resnet50(): + return torchvision.models.resnet50(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def resnet101(): + return torchvision.models.resnet101(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def resnet152(): + return torchvision.models.resnet152(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def densenet121(): + return torchvision.models.densenet121(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def densenet169(): + return torchvision.models.densenet169(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def densenet201(): + return torchvision.models.densenet201(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def densenet161(): + return torchvision.models.densenet161(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def vgg11(): + return torchvision.models.vgg11(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def vgg13(): + return torchvision.models.vgg13(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def vgg16(): + return torchvision.models.vgg16(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def vgg19(): + return torchvision.models.vgg19(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def vgg11_bn(): + return torchvision.models.vgg11_bn(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def vgg13_bn(): + return torchvision.models.vgg13_bn(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def vgg16_bn(): + return torchvision.models.vgg16_bn(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def vgg19_bn(): + return torchvision.models.vgg19_bn(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def mobilenet_v2(): + return torchvision.models.mobilenet_v2(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def shufflenet_v2_x0_5(): + return torchvision.models.shufflenet_v2_x0_5(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def shufflenet_v2_x1_0(): + return torchvision.models.shufflenet_v2_x1_0(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def shufflenet_v2_x1_5(): + return torchvision.models.shufflenet_v2_x1_5(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def shufflenet_v2_x2_0(): + return torchvision.models.shufflenet_v2_x2_0(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def mnasnet0_5(): + return torchvision.models.mnasnet0_5(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def mnasnet0_75(): + return torchvision.models.mnasnet0_75(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def mnasnet1_0(): + return torchvision.models.mnasnet1_0(pretrained=False) + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def mnasnet1_3(): + return torchvision.models.mnasnet1_3(pretrained=False) \ No newline at end of file diff --git a/torch2trt/tests/torchvision/segmentation.py b/torch2trt/tests/torchvision/segmentation.py new file mode 100644 index 00000000..6cc915dd --- /dev/null +++ b/torch2trt/tests/torchvision/segmentation.py @@ -0,0 +1,39 @@ +import torch +import torchvision +from torch2trt.module_test import add_module_test + + +class ModelWrapper(torch.nn.Module): + def __init__(self, model): + super(ModelWrapper, self).__init__() + self.model = model + def forward(self, x): + return self.model(x)['out'] + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def deeplabv3_resnet50(): + bb = torchvision.models.segmentation.deeplabv3_resnet50(pretrained=False) + model = ModelWrapper(bb) + return model + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def deeplabv3_resnet101(): + bb = torchvision.models.segmentation.deeplabv3_resnet101(pretrained=False) + model = ModelWrapper(bb) + return model + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def fcn_resnet50(): + bb = torchvision.models.segmentation.fcn_resnet50(pretrained=False) + model = ModelWrapper(bb) + return model + + +@add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) +def fcn_resnet101(): + bb = torchvision.models.segmentation.fcn_resnet101(pretrained=False) + model = ModelWrapper(bb) + return model \ No newline at end of file From 4852b822ca409550e07e4fec41fcd12a69393720 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 2 Oct 2019 12:47:18 -0700 Subject: [PATCH 194/355] fix missing --include argument in test.py --- torch2trt/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch2trt/test.py b/torch2trt/test.py index cecaaca6..c0ec1ddc 100644 --- a/torch2trt/test.py +++ b/torch2trt/test.py @@ -88,7 +88,7 @@ def run(self): parser.add_argument('--output', '-o', help='Test output file path', type=str, default='torch2trt_test.md') parser.add_argument('--name', help='Regular expression to filter modules to test by name', type=str, default='.*') parser.add_argument('--tolerance', help='Maximum error to print warning for entry', type=float, default='-1') - parser.add_argument('--include', help='Addition python file to include defining additional tests', action='append') + parser.add_argument('--include', help='Addition python file to include defining additional tests', action='append', default=[]) args = parser.parse_args() for include in args.include: From 7693e81c1943c025e6065df16dea87be07190512 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 2 Oct 2019 14:27:54 -0700 Subject: [PATCH 195/355] removed copy(..) from conversion hook --- torch2trt/torch2trt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 46a05778..dcc688ff 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -211,7 +211,7 @@ class ConversionHook(object): def __init__(self, ctx, method, converter): self.ctx = ctx self.method_str = method - self.method_impl = copy(eval(method)) + self.method_impl = eval(method) self.converter = converter def _set_method(self, method): From f2c931ed088f152123c7f7ebbbf75cf6cb188f62 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 2 Oct 2019 14:27:54 -0700 Subject: [PATCH 196/355] removed copy(..) from conversion hook --- torch2trt/torch2trt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 46a05778..dcc688ff 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -211,7 +211,7 @@ class ConversionHook(object): def __init__(self, ctx, method, converter): self.ctx = ctx self.method_str = method - self.method_impl = copy(eval(method)) + self.method_impl = eval(method) self.converter = converter def _set_method(self, method): From 506fb87809cf16ff3f488de0d7e70ac11bd71328 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 2 Oct 2019 14:42:19 -0700 Subject: [PATCH 197/355] handle __div__, __rdiv__ and __idiv__ to support Python2 --- torch2trt/converters/div.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/torch2trt/converters/div.py b/torch2trt/converters/div.py index df5cac18..4da1368d 100644 --- a/torch2trt/converters/div.py +++ b/torch2trt/converters/div.py @@ -3,8 +3,10 @@ @tensorrt_converter('torch.div') -@tensorrt_converter('torch.Tensor.__itruediv__') -@tensorrt_converter('torch.Tensor.__truediv__') +@tensorrt_converter('torch.Tensor.__div__') # py2 +@tensorrt_converter('torch.Tensor.__idiv__') # py2 +@tensorrt_converter('torch.Tensor.__truediv__') # py3 +@tensorrt_converter('torch.Tensor.__itruediv__') # py3 def convert_div(ctx): input_a = ctx.method_args[0] input_b = ctx.method_args[1] @@ -14,7 +16,8 @@ def convert_div(ctx): output._trt = layer.get_output(0) -@tensorrt_converter('torch.Tensor.__rtruediv__') +@tensorrt_converter('torch.Tensor.__rdiv__') # py2 +@tensorrt_converter('torch.Tensor.__rtruediv__') # py3 def convert_rdiv(ctx): input_a = ctx.method_args[1] # inputs switched for rdiv input_b = ctx.method_args[0] From 6fafa46e3aa401fbb4090d74231178c7de08597b Mon Sep 17 00:00:00 2001 From: John Date: Wed, 2 Oct 2019 15:26:54 -0700 Subject: [PATCH 198/355] removes caffe2 from build caffe2 is not needed and breaks build when using PyTorch 1.2 --- build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.py b/build.py index 8ab773bf..fb3bb3c5 100644 --- a/build.py +++ b/build.py @@ -11,7 +11,7 @@ NINJA_TEMPLATE = Template(( "rule link\n" - " command = g++ -shared -o $$out $$in -L$torch_dir/lib -L$cuda_dir/lib64 -L$trt_lib_dir -lc10 -lc10_cuda -ltorch -lcudart -lcaffe2 -lcaffe2_gpu -lprotobuf -lprotobuf-lite -pthread -lpthread -lnvinfer\n" + " command = g++ -shared -o $$out $$in -L$torch_dir/lib -L$cuda_dir/lib64 -L$trt_lib_dir -lc10 -lc10_cuda -ltorch -lcudart -lprotobuf -lprotobuf-lite -pthread -lpthread -lnvinfer\n" "rule protoc\n" " command = protoc $$in --cpp_out=. --python_out=.\n" "rule cxx\n" From c8b3619fbd9232667542a9c5cbcf7c28f9129609 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 3 Oct 2019 20:44:37 -0700 Subject: [PATCH 199/355] added support for prelu --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/prelu.py | 47 ++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 torch2trt/converters/prelu.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index dfc74a7f..69847aae 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -19,6 +19,7 @@ from .normalize import * from .pad import * from .permute import * +from .prelu import * from .relu import * from .ReLU import * from .relu6 import * diff --git a/torch2trt/converters/prelu.py b/torch2trt/converters/prelu.py new file mode 100644 index 00000000..c2c4ca6f --- /dev/null +++ b/torch2trt/converters/prelu.py @@ -0,0 +1,47 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.nn.functional.prelu') +def convert_prelu(ctx): + input = get_arg(ctx, 'input', pos=0, default=None) + weight = get_arg(ctx, 'weight', pos=1, default=None) + output = ctx.method_return + + weight_shape = [1] * (len(input.shape) - 1) + weight_shape[0] = weight.numel() + + input_trt = trt_(ctx.network, input) + + + # y = prelu(x) = relu(x) - alpha * relu(-x) + weight_trt = ctx.network.add_constant(weight_shape, -weight.detach().view(weight_shape).cpu().numpy()).get_output(0) # detach so considered leaf + + # x >= 0 + a = ctx.network.add_activation(input_trt, trt.ActivationType.RELU).get_output(0) + + # x <= 0 + b = ctx.network.add_unary(input_trt, trt.UnaryOperation.NEG).get_output(0) + b = ctx.network.add_activation(b, trt.ActivationType.RELU).get_output(0) + b = ctx.network.add_elementwise(b, weight_trt, trt.ElementWiseOperation.PROD).get_output(0) + + # y = a + b + y = ctx.network.add_elementwise(a, b, trt.ElementWiseOperation.SUM) + + output._trt = y.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3, 3)]) +def test_prelu_scalar(): + return torch.nn.PReLU() + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3, 3)]) +def test_prelu_vector(): + m = torch.nn.PReLU(5) + m.weight = torch.nn.Parameter(torch.randn(5)) # randn so each channel different + return m \ No newline at end of file From 9bbf3485c612ba63568cc2ca78b51264b4d774b5 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 3 Oct 2019 22:21:24 -0700 Subject: [PATCH 200/355] added supported unary ops --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/unary.py | 280 +++++++++++++++++++++++++++++++ 2 files changed, 281 insertions(+) create mode 100644 torch2trt/converters/unary.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 69847aae..bed3b3f4 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -32,6 +32,7 @@ from .softmax import * from .split import * from .chunk import * +from .unary import * try: diff --git a/torch2trt/converters/unary.py b/torch2trt/converters/unary.py new file mode 100644 index 00000000..95000957 --- /dev/null +++ b/torch2trt/converters/unary.py @@ -0,0 +1,280 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +def __convert_unary(ctx, op): + input = get_arg(ctx, 'input', pos=0, default=None) + input_trt = trt_(ctx.network, input) + output = ctx.method_return + layer = ctx.network.add_unary(input_trt, op) + output._trt = layer.get_output(0) + + +class UnaryModule(torch.nn.Module): + def __init__(self, fn): + super(UnaryModule, self).__init__() + self.fn = fn + + def forward(self, x): + return self.fn(x) + +# EXP : Exponentiation + + +@tensorrt_converter('torch.exp') +@tensorrt_converter('torch.exp_') +@tensorrt_converter('torch.Tensor.exp') +@tensorrt_converter('torch.Tensor.exp_') +def convert_exp(ctx): + __convert_unary(ctx, trt.UnaryOperation.EXP) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_exp(): + return UnaryModule(lambda x: torch.exp(x)) + + +# LOG : Log (base e) + + +@tensorrt_converter('torch.log') +@tensorrt_converter('torch.log_') +@tensorrt_converter('torch.Tensor.log') +@tensorrt_converter('torch.Tensor.log_') +def convert_log(ctx): + __convert_unary(ctx, trt.UnaryOperation.LOG) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_log(): + return UnaryModule(lambda x: torch.log(x)) + + +# SQRT : Square root + + +@tensorrt_converter('torch.sqrt') +@tensorrt_converter('torch.sqrt_') +@tensorrt_converter('torch.Tensor.sqrt') +@tensorrt_converter('torch.Tensor.sqrt_') +def convert_sqrt(ctx): + __convert_unary(ctx, trt.UnaryOperation.SQRT) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_sqrt(): + return UnaryModule(lambda x: torch.sqrt(x)) + + +# RECIP : Reciprocal + + +@tensorrt_converter('torch.reciprocal') +@tensorrt_converter('torch.reciprocal_') +@tensorrt_converter('torch.Tensor.reciprocal') +@tensorrt_converter('torch.Tensor.reciprocal_') +def convert_reciprocal(ctx): + __convert_unary(ctx, trt.UnaryOperation.RECIP) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_reciprocal(): + return UnaryModule(lambda x: torch.reciprocal(x)) + + +# ABS : Absolute value + + +@tensorrt_converter('torch.abs') +@tensorrt_converter('torch.abs_') +@tensorrt_converter('torch.Tensor.abs') +@tensorrt_converter('torch.Tensor.abs_') +def convert_abs(ctx): + __convert_unary(ctx, trt.UnaryOperation.ABS) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_abs(): + return UnaryModule(lambda x: torch.abs(x)) + + +# NEG : Negation + +@tensorrt_converter('torch.neg') +@tensorrt_converter('torch.neg_') +@tensorrt_converter('torch.Tensor.neg') +@tensorrt_converter('torch.Tensor.neg_') +def convert_neg(ctx): + __convert_unary(ctx, trt.UnaryOperation.NEG) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_neg(): + return UnaryModule(lambda x: torch.neg(x)) + + +# SIN : Sine + + +@tensorrt_converter('torch.sin') +@tensorrt_converter('torch.sin_') +@tensorrt_converter('torch.Tensor.sin') +@tensorrt_converter('torch.Tensor.sin_') +def convert_sin(ctx): + __convert_unary(ctx, trt.UnaryOperation.SIN) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_sin(): + return UnaryModule(lambda x: torch.sin(x)) + + +# COS : Cosine + + +@tensorrt_converter('torch.cos') +@tensorrt_converter('torch.cos_') +@tensorrt_converter('torch.Tensor.cos') +@tensorrt_converter('torch.Tensor.cos_') +def convert_cos(ctx): + __convert_unary(ctx, trt.UnaryOperation.COS) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_cos(): + return UnaryModule(lambda x: torch.cos(x)) + + +# | TAN : Tangent + + +@tensorrt_converter('torch.tan') +@tensorrt_converter('torch.tan_') +@tensorrt_converter('torch.Tensor.tan') +@tensorrt_converter('torch.Tensor.tan_') +def convert_cos(ctx): + __convert_unary(ctx, trt.UnaryOperation.TAN) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_tan(): + return UnaryModule(lambda x: torch.tan(x)) + + +# | SINH : Hyperbolic sine + + +@tensorrt_converter('torch.sinh') +@tensorrt_converter('torch.sinh_') +@tensorrt_converter('torch.Tensor.sinh') +@tensorrt_converter('torch.Tensor.sinh_') +def convert_sinh(ctx): + __convert_unary(ctx, trt.UnaryOperation.SINH) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_sinh(): + return UnaryModule(lambda x: torch.sinh(x)) + + +# | COSH : Hyperbolic cosine + + +@tensorrt_converter('torch.cosh') +@tensorrt_converter('torch.cosh_') +@tensorrt_converter('torch.Tensor.cosh') +@tensorrt_converter('torch.Tensor.cosh_') +def convert_cosh(ctx): + __convert_unary(ctx, trt.UnaryOperation.COSH) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_cosh(): + return UnaryModule(lambda x: torch.cosh(x)) + + +# | ASIN : Inverse sine + + +@tensorrt_converter('torch.asin') +@tensorrt_converter('torch.asin_') +@tensorrt_converter('torch.Tensor.asin') +@tensorrt_converter('torch.Tensor.asin_') +def convert_asin(ctx): + __convert_unary(ctx, trt.UnaryOperation.ASIN) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_asin(): + return UnaryModule(lambda x: torch.asin(x)) + + +# | ACOS : Inverse cosine + + +@tensorrt_converter('torch.acos') +@tensorrt_converter('torch.acos_') +@tensorrt_converter('torch.Tensor.acos') +@tensorrt_converter('torch.Tensor.acos_') +def convert_acos(ctx): + __convert_unary(ctx, trt.UnaryOperation.ACOS) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_acos(): + return UnaryModule(lambda x: torch.acos(x)) + + +# | ATAN : Inverse tangent + + +@tensorrt_converter('torch.atan') +@tensorrt_converter('torch.atan_') +@tensorrt_converter('torch.Tensor.atan') +@tensorrt_converter('torch.Tensor.atan_') +def convert_atan(ctx): + __convert_unary(ctx, trt.UnaryOperation.ATAN) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_atan(): + return UnaryModule(lambda x: torch.atan(x)) + + +# | ASINH : Inverse hyperbolic sine +# | +# | ACOSH : Inverse hyperbolic cosine +# | +# | ATANH : Inverse hyperbolic tangent +# | + +# CEIL : Ceiling + + +@tensorrt_converter('torch.ceil') +@tensorrt_converter('torch.ceil_') +@tensorrt_converter('torch.Tensor.ceil') +@tensorrt_converter('torch.Tensor.ceil_') +def convert_ceil(ctx): + __convert_unary(ctx, trt.UnaryOperation.CEIL) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_ceil(): + return UnaryModule(lambda x: torch.ceil(x)) + + +# FLOOR : Floor + + +@tensorrt_converter('torch.floor') +@tensorrt_converter('torch.floor_') +@tensorrt_converter('torch.Tensor.floor') +@tensorrt_converter('torch.Tensor.floor_') +def convert_floor(ctx): + __convert_unary(ctx, trt.UnaryOperation.FLOOR) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_floor(): + return UnaryModule(lambda x: torch.floor(x)) \ No newline at end of file From 1c87da6ce3afe87bd2f5b6d537b323f7a3222b3d Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 3 Oct 2019 23:18:07 -0700 Subject: [PATCH 201/355] added max reduce/elementwise --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/max.py | 61 ++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 torch2trt/converters/max.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index bed3b3f4..109f5c43 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -16,6 +16,7 @@ from .Linear import * from .LogSoftmax import * from .max_pool2d import * +from .max import * from .normalize import * from .pad import * from .permute import * diff --git a/torch2trt/converters/max.py b/torch2trt/converters/max.py new file mode 100644 index 00000000..a95a6b8e --- /dev/null +++ b/torch2trt/converters/max.py @@ -0,0 +1,61 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test +from .unary import UnaryModule + + +def __convert_max_elementwise(ctx): + input_a = ctx.method_args[0] + input_b = ctx.method_args[1] + input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) + output = ctx.method_return + layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.MAX) + output._trt = layer.get_output(0) + + +def __convert_max_reduce(ctx): + input = ctx.method_args[0] + dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(input.ndim))) + keepdim = get_arg(ctx, 'keepdim', pos=2, default=False) + input_trt= trt_(ctx.network, input) + output_val = ctx.method_return[0] + output_idx = ctx.method_return[1] + layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.MAX, torch_dim_to_trt_axes(dim), keepdim) + output_val._trt = layer.get_output(0) + + +@tensorrt_converter('torch.max') +@tensorrt_converter('torch.Tensor.max') +def convert_max(ctx): + if len(ctx.method_args) > 1 and isinstance(ctx.method_args[1], torch.Tensor): + __convert_max_elementwise(ctx) + else: + __convert_max_reduce(ctx) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +def test_max_reduce_dim1(): + return UnaryModule(lambda x: torch.max(x, 1)[0]) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +def test_max_reduce_dim22(): + return UnaryModule(lambda x: torch.max(x, 2)[0]) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +def test_max_reduce_dim1_keepdim(): + return UnaryModule(lambda x: torch.max(x, 1, keepdim=True)[0]) + + +class MaxElementwise(torch.nn.Module): + def forward(self, x, y): + return torch.max(x, y) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3), (1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3), (1,)]) # broadcast +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3), (1, 3, 3)]) # broadcast +def test_max_elementwise(): + return MaxElementwise() \ No newline at end of file From 86feae598a29f304a1d41ff483282d50a35fce1f Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 3 Oct 2019 23:29:13 -0700 Subject: [PATCH 202/355] added min reduce/elementwise --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/min.py | 61 ++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 torch2trt/converters/min.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 109f5c43..be9ab8b1 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -17,6 +17,7 @@ from .LogSoftmax import * from .max_pool2d import * from .max import * +from .min import * from .normalize import * from .pad import * from .permute import * diff --git a/torch2trt/converters/min.py b/torch2trt/converters/min.py new file mode 100644 index 00000000..f42a34d3 --- /dev/null +++ b/torch2trt/converters/min.py @@ -0,0 +1,61 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test +from .unary import UnaryModule + + +def __convert_min_elementwise(ctx): + input_a = ctx.method_args[0] + input_b = ctx.method_args[1] + input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) + output = ctx.method_return + layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.MIN) + output._trt = layer.get_output(0) + + +def __convert_min_reduce(ctx): + input = ctx.method_args[0] + dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(input.ndim))) + keepdim = get_arg(ctx, 'keepdim', pos=2, default=False) + input_trt= trt_(ctx.network, input) + output_val = ctx.method_return[0] + output_idx = ctx.method_return[1] + layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.MIN, torch_dim_to_trt_axes(dim), keepdim) + output_val._trt = layer.get_output(0) + + +@tensorrt_converter('torch.min') +@tensorrt_converter('torch.Tensor.min') +def convert_min(ctx): + if len(ctx.method_args) > 1 and isinstance(ctx.method_args[1], torch.Tensor): + __convert_min_elementwise(ctx) + else: + __convert_min_reduce(ctx) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +def test_min_reduce_dim1(): + return UnaryModule(lambda x: torch.min(x, 1)[0]) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +def test_min_reduce_dim22(): + return UnaryModule(lambda x: torch.min(x, 2)[0]) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +def test_min_reduce_dim1_keepdim(): + return UnaryModule(lambda x: torch.min(x, 1, keepdim=True)[0]) + + +class MinElementwise(torch.nn.Module): + def forward(self, x, y): + return torch.min(x, y) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3), (1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3), (1,)]) # broadcast +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3), (1, 3, 3)]) # broadcast +def test_min_elementwise(): + return MinElementwise() \ No newline at end of file From dfc74832c75a6f5b4676cc9d9ababa91b637fd7d Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 3 Oct 2019 23:37:33 -0700 Subject: [PATCH 203/355] added sub --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/sub.py | 89 ++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 torch2trt/converters/sub.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index be9ab8b1..60ed34b1 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -27,6 +27,7 @@ from .relu6 import * from .ReLU6 import * from .sigmoid import * +from .sub import * from .view import * from .tanh import * from .transpose import * diff --git a/torch2trt/converters/sub.py b/torch2trt/converters/sub.py new file mode 100644 index 00000000..848fd1aa --- /dev/null +++ b/torch2trt/converters/sub.py @@ -0,0 +1,89 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.sub') +@tensorrt_converter('torch.Tensor.__isub__') +@tensorrt_converter('torch.Tensor.__sub__') +def convert_sub(ctx): + input_a = ctx.method_args[0] + input_b = ctx.method_args[1] + input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) + output = ctx.method_return + layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUB) + output._trt = layer.get_output(0) + + +@tensorrt_converter('torch.Tensor.__rsub__') +def convert_sub(ctx): + input_a = ctx.method_args[1] + input_b = ctx.method_args[0] # flipped for rsub + input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) + output = ctx.method_return + layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUB) + output._trt = layer.get_output(0) + + +class Sub(torch.nn.Module): + def __init__(self): + super(Sub, self).__init__() + + def forward(self, x, y): + return x - y + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) +def test_sub_basic(): + return Sub() + + +class ISub(torch.nn.Module): + def __init__(self): + super(ISub, self).__init__() + + def forward(self, x, y): + x -= y + return x + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) +def test_sub_isub(): + return ISub() + + +class TorchSub(torch.nn.Module): + def __init__(self): + super(TorchSub, self).__init__() + + def forward(self, x, y): + return torch.sub(x, y) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) +def test_torch_sub(): + return TorchSub() + + +class RSubInt(torch.nn.Module): + def __init__(self): + super(RSubInt, self).__init__() + + def forward(self, x): + return 1 - x + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_rsub_int(): + return RSubInt() + + +class RSubFloat(torch.nn.Module): + def __init__(self): + super(RSubFloat, self).__init__() + + def forward(self, x): + return 1.0 - x + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_rsub_float(): + return RSubFloat() \ No newline at end of file From 601b94e990d125e2b3bf9412fd1b877743ea6514 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 3 Oct 2019 23:46:28 -0700 Subject: [PATCH 204/355] added pow elementwise --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/pow.py | 90 ++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 torch2trt/converters/pow.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 60ed34b1..0e824cfa 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -21,6 +21,7 @@ from .normalize import * from .pad import * from .permute import * +from .pow import * from .prelu import * from .relu import * from .ReLU import * diff --git a/torch2trt/converters/pow.py b/torch2trt/converters/pow.py new file mode 100644 index 00000000..4b743b48 --- /dev/null +++ b/torch2trt/converters/pow.py @@ -0,0 +1,90 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.pow') +@tensorrt_converter('torch.Tensor.__ipow__') +@tensorrt_converter('torch.Tensor.__pow__') +def convert_pow(ctx): + input_a = ctx.method_args[0] + input_b = ctx.method_args[1] + input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) + output = ctx.method_return + layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.POW) + output._trt = layer.get_output(0) + + +@tensorrt_converter('torch.Tensor.__rpow__') +def convert_pow(ctx): + input_a = ctx.method_args[1] + input_b = ctx.method_args[0] # flipped for rpow + input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) + output = ctx.method_return + layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.POW) + output._trt = layer.get_output(0) + + +class Pow(torch.nn.Module): + def __init__(self): + super(Pow, self).__init__() + + def forward(self, x, y): + return x ** y + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) +def test_pow_basic(): + return Pow() + + +# __ipow__ not yet impl in torch +# class IPow(torch.nn.Module): +# def __init__(self): +# super(IPow, self).__init__() + +# def forward(self, x, y): +# x **= y +# return x + + +# @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) +# def test_pow_ipow(): +# return IPow() + + +class TorchPow(torch.nn.Module): + def __init__(self): + super(TorchPow, self).__init__() + + def forward(self, x, y): + return torch.pow(x, y) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) +def test_torch_pow(): + return TorchPow() + + +class RpowInt(torch.nn.Module): + def __init__(self): + super(RpowInt, self).__init__() + + def forward(self, x): + return 2 ** x + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_rpow_int(): + return RpowInt() + + +class RpowFloat(torch.nn.Module): + def __init__(self): + super(RpowFloat, self).__init__() + + def forward(self, x): + return 2.0 ** x + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_rpow_float(): + return RpowFloat() \ No newline at end of file From 794785548fc184010697ac01a9b1a806c06ad5b4 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 4 Oct 2019 00:16:41 -0700 Subject: [PATCH 205/355] fixed min/max reduce all --- torch2trt/converters/max.py | 10 ++++++++-- torch2trt/converters/min.py | 8 +++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/torch2trt/converters/max.py b/torch2trt/converters/max.py index a95a6b8e..5ac4df7e 100644 --- a/torch2trt/converters/max.py +++ b/torch2trt/converters/max.py @@ -14,7 +14,7 @@ def __convert_max_elementwise(ctx): def __convert_max_reduce(ctx): input = ctx.method_args[0] - dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(input.ndim))) + dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1, input.ndim))) keepdim = get_arg(ctx, 'keepdim', pos=2, default=False) input_trt= trt_(ctx.network, input) output_val = ctx.method_return[0] @@ -31,7 +31,13 @@ def convert_max(ctx): else: __convert_max_reduce(ctx) - + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +def test_max_reduce_all(): + return UnaryModule(lambda x: torch.max(x, 1)[0]) + + @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) def test_max_reduce_dim1(): diff --git a/torch2trt/converters/min.py b/torch2trt/converters/min.py index f42a34d3..0412567f 100644 --- a/torch2trt/converters/min.py +++ b/torch2trt/converters/min.py @@ -14,7 +14,7 @@ def __convert_min_elementwise(ctx): def __convert_min_reduce(ctx): input = ctx.method_args[0] - dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(input.ndim))) + dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1,input.ndim))) keepdim = get_arg(ctx, 'keepdim', pos=2, default=False) input_trt= trt_(ctx.network, input) output_val = ctx.method_return[0] @@ -32,6 +32,12 @@ def convert_min(ctx): __convert_min_reduce(ctx) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +def test_min_reduce_all(): + return UnaryModule(lambda x: torch.min(x, 1)[0]) + + @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) def test_min_reduce_dim1(): From 0a3eaee53ccef5ba85e1fd6a85cc49e99173a8bc Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 4 Oct 2019 00:16:57 -0700 Subject: [PATCH 206/355] added prod --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/prod.py | 38 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 torch2trt/converters/prod.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 0e824cfa..0eed3024 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -23,6 +23,7 @@ from .permute import * from .pow import * from .prelu import * +from .prod import * from .relu import * from .ReLU import * from .relu6 import * diff --git a/torch2trt/converters/prod.py b/torch2trt/converters/prod.py new file mode 100644 index 00000000..185cdf22 --- /dev/null +++ b/torch2trt/converters/prod.py @@ -0,0 +1,38 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test +from .unary import UnaryModule + + +@tensorrt_converter('torch.prod') +@tensorrt_converter('torch.Tensor.prod') +def convert_prod(ctx): + input = ctx.method_args[0] + dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1, input.ndim))) + keepdim = get_arg(ctx, 'keepdim', pos=2, default=False) + input_trt= trt_(ctx.network, input) + output = ctx.method_return + layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.PROD, torch_dim_to_trt_axes(dim), keepdim) + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +def test_prod_reduce_all(): + return UnaryModule(lambda x: torch.prod(x)) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +def test_prod_reduce_dim1(): + return UnaryModule(lambda x: torch.prod(x, 1)) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +def test_prod_reduce_dim22(): + return UnaryModule(lambda x: torch.prod(x, 2)) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +def test_prod_reduce_dim1_keepdim(): + return UnaryModule(lambda x: torch.prod(x, 1, keepdim=True)) \ No newline at end of file From 16ac4888b68c00bbeb24ea47c5420314b2d37d50 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 4 Oct 2019 00:19:58 -0700 Subject: [PATCH 207/355] added sum reduce --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/sum.py | 38 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 torch2trt/converters/sum.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 0eed3024..9c6a8f2a 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -30,6 +30,7 @@ from .ReLU6 import * from .sigmoid import * from .sub import * +from .sum import * from .view import * from .tanh import * from .transpose import * diff --git a/torch2trt/converters/sum.py b/torch2trt/converters/sum.py new file mode 100644 index 00000000..e3873e76 --- /dev/null +++ b/torch2trt/converters/sum.py @@ -0,0 +1,38 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test +from .unary import UnaryModule + + +@tensorrt_converter('torch.sum') +@tensorrt_converter('torch.Tensor.sum') +def convert_sum(ctx): + input = ctx.method_args[0] + dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1, input.ndim))) + keepdim = get_arg(ctx, 'keepdim', pos=2, default=False) + input_trt= trt_(ctx.network, input) + output = ctx.method_return + layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.SUM, torch_dim_to_trt_axes(dim), keepdim) + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +def test_sum_reduce_all(): + return UnaryModule(lambda x: torch.sum(x)) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +def test_sum_reduce_dim1(): + return UnaryModule(lambda x: torch.sum(x, 1)) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +def test_sum_reduce_dim22(): + return UnaryModule(lambda x: torch.sum(x, 2)) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +def test_sum_reduce_dim1_keepdim(): + return UnaryModule(lambda x: torch.sum(x, 1, keepdim=True)) \ No newline at end of file From 95a58492171139f4b0fb7547f249349e1acf9d84 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 4 Oct 2019 00:25:47 -0700 Subject: [PATCH 208/355] increment patch --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a2b8dded..b15ea7c5 100644 --- a/setup.py +++ b/setup.py @@ -115,7 +115,7 @@ def run(self): setup( name='torch2trt', - version='0.0.0', + version='0.0.1', description='An easy to use PyTorch to TensorRT converter', cmdclass={ 'install': InstallCommand, From 8d0090743a1d73f77168f6276df8521378745fe7 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 4 Oct 2019 00:31:57 -0700 Subject: [PATCH 209/355] separate tensors for test/convert to ensure falsely added constants cant pass --- torch2trt/test.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/torch2trt/test.py b/torch2trt/test.py index c0ec1ddc..464ea31d 100644 --- a/torch2trt/test.py +++ b/torch2trt/test.py @@ -14,16 +14,20 @@ def run(self): module = module.type(self.dtype) module = module.eval() - # create inputs + # create inputs for conversion + inputs_conversion = () + for shape in self.input_shapes: + inputs_conversion += (torch.zeros(shape).to(self.device).type(self.dtype), ) + + # convert module + module_trt = torch2trt(module, inputs_conversion, **self.torch2trt_kwargs) + + # create inputs for torch/trt.. copy of inputs to handle inplace ops inputs = () for shape in self.input_shapes: inputs += (torch.randn(shape).to(self.device).type(self.dtype), ) - - # create copy of inputs to handle inplace ops inputs_trt = tuple([tensor.clone() for tensor in inputs]) - # convert module - module_trt = torch2trt(module, inputs, **self.torch2trt_kwargs) # test output against original outputs = module(*inputs) From a6ee9c4e940760042a8cda349b4efa2fb4878da1 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 4 Oct 2019 00:39:51 -0700 Subject: [PATCH 210/355] more relu interfaces --- torch2trt/converters/relu.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/torch2trt/converters/relu.py b/torch2trt/converters/relu.py index ba3b5e0a..37f71167 100644 --- a/torch2trt/converters/relu.py +++ b/torch2trt/converters/relu.py @@ -2,7 +2,10 @@ from .ReLU import * +@tensorrt_converter('torch.relu') +@tensorrt_converter('torch.relu_') @tensorrt_converter('torch.nn.functional.relu') +@tensorrt_converter('torch.nn.functional.relu_') def convert_relu(ctx): ctx.method_args = (torch.nn.ReLU(),) + ctx.method_args convert_ReLU(ctx) \ No newline at end of file From 599524b625f1ae75139dc81017bf7f88b6bbbf83 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 4 Oct 2019 02:22:31 -0700 Subject: [PATCH 211/355] fixed broadcast shapes, removed final_shapes --- torch2trt/torch2trt.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index dcc688ff..f09bd324 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -109,7 +109,10 @@ def trt_(network, *tensors): broadcast_num_dim = 0 for t in tensors: if isinstance(t, torch.Tensor): - num_dim = len(t.shape[1:]) # exclude batch + if t.is_leaf: + num_dim = len(t.shape) # don't exclude batch for constants + else: + num_dim = len(t._trt.shape) # non-leaf tensors must already have _trt, get shape from that if num_dim > broadcast_num_dim: broadcast_num_dim = num_dim @@ -126,8 +129,8 @@ def trt_(network, *tensors): # or... add constant for leaf tensor w/o _trt elif isinstance(t, torch.Tensor) and t.is_leaf and not hasattr(t, '_trt'): # add leaf tensor - shape = tuple(t.shape[1:]) - weight = t[0].detach().cpu().numpy() + shape = tuple(t.shape) # don't exclude batch when adding constants...? + weight = t.detach().cpu().numpy() t._trt = network.add_constant(shape, weight).get_output(0) trt_tensor = t._trt @@ -141,7 +144,7 @@ def trt_(network, *tensors): # MAKE TRT TENSOR BROADCASTABLE IF IT IS NOT ALREADY - if len(trt_tensor.shape) != broadcast_num_dim: + if len(trt_tensor.shape) < broadcast_num_dim: # append 1 size dims to front diff = broadcast_num_dim - len(trt_tensor.shape) shape = tuple([1] * diff + list(trt_tensor.shape)) @@ -274,7 +277,7 @@ def mark_outputs(self, torch_outputs, names=None): class TRTModule(torch.nn.Module): - def __init__(self, engine=None, input_names=None, output_names=None, final_shapes=None): + def __init__(self, engine=None, input_names=None, output_names=None): super(TRTModule, self).__init__() self._register_state_dict_hook(TRTModule._on_state_dict) self.engine = engine @@ -282,13 +285,11 @@ def __init__(self, engine=None, input_names=None, output_names=None, final_shape self.context = self.engine.create_execution_context() self.input_names = input_names self.output_names = output_names - self.final_shapes = final_shapes def _on_state_dict(self, state_dict, prefix, local_metadata): state_dict[prefix + 'engine'] = bytes(self.engine.serialize()) state_dict[prefix + 'input_names'] = self.input_names state_dict[prefix + 'output_names'] = self.output_names - state_dict[prefix + 'final_shapes'] = self.final_shapes def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs): engine_bytes = state_dict[prefix + 'engine'] @@ -299,7 +300,6 @@ def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, miss self.input_names = state_dict[prefix + 'input_names'] self.output_names = state_dict[prefix + 'output_names'] - self.final_shapes = state_dict[prefix + 'final_shapes'] def forward(self, *inputs): batch_size = inputs[0].shape[0] @@ -310,10 +310,7 @@ def forward(self, *inputs): for i, output_name in enumerate(self.output_names): idx = self.engine.get_binding_index(output_name) dtype = torch_dtype_from_trt(self.engine.get_binding_dtype(idx)) - if self.final_shapes is not None: - shape = (batch_size, ) + self.final_shapes[i] - else: - shape = (batch_size, ) + tuple(self.engine.get_binding_shape(idx)) + shape = (batch_size, ) + tuple(self.engine.get_binding_shape(idx)) device = torch_device_from_trt(self.engine.get_location(idx)) output = torch.empty(size=shape, dtype=dtype, device=device) outputs[i] = output @@ -336,6 +333,9 @@ def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt fp16_mode=False, max_workspace_size=0, strict_type_constraints=False): # copy inputs to avoid modifications to source data + for t in inputs: + t.requires_grad = True + inputs = [tensor.clone() for tensor in inputs] with trt.Logger(log_level) as logger, trt.Builder(logger) as builder,\ @@ -353,8 +353,6 @@ def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt outputs = (outputs, ) ctx.mark_outputs(outputs, output_names) - final_shapes = [tuple(output.shape)[1:] for output in list(outputs)] - builder.max_workspace_size = max_workspace_size builder.fp16_mode = fp16_mode builder.max_batch_size = max_batch_size @@ -362,7 +360,7 @@ def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt engine = builder.build_cuda_engine(network) - return TRTModule(engine, ctx.input_names, ctx.output_names, final_shapes) + return TRTModule(engine, ctx.input_names, ctx.output_names) # DEFINE ALL CONVERSION FUNCTIONS From f25eaa5dff426b61fc391347fe2631085c158ae4 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 4 Oct 2019 02:23:23 -0700 Subject: [PATCH 212/355] removed all-dim test from max/min, since this cannot be supported by tensorrt --- torch2trt/converters/max.py | 6 ------ torch2trt/converters/min.py | 6 ------ 2 files changed, 12 deletions(-) diff --git a/torch2trt/converters/max.py b/torch2trt/converters/max.py index 5ac4df7e..afcb7954 100644 --- a/torch2trt/converters/max.py +++ b/torch2trt/converters/max.py @@ -31,12 +31,6 @@ def convert_max(ctx): else: __convert_max_reduce(ctx) - -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) -def test_max_reduce_all(): - return UnaryModule(lambda x: torch.max(x, 1)[0]) - @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) diff --git a/torch2trt/converters/min.py b/torch2trt/converters/min.py index 0412567f..109e77b3 100644 --- a/torch2trt/converters/min.py +++ b/torch2trt/converters/min.py @@ -31,12 +31,6 @@ def convert_min(ctx): else: __convert_min_reduce(ctx) - -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) -def test_min_reduce_all(): - return UnaryModule(lambda x: torch.min(x, 1)[0]) - @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) From 59ee8349cbaf5d1e3b852035198c22445913b287 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 4 Oct 2019 02:47:29 -0700 Subject: [PATCH 213/355] removed leaf check --- torch2trt/torch2trt.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index f09bd324..c8524702 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -109,7 +109,7 @@ def trt_(network, *tensors): broadcast_num_dim = 0 for t in tensors: if isinstance(t, torch.Tensor): - if t.is_leaf: + if not hasattr(t, '_trt'): num_dim = len(t.shape) # don't exclude batch for constants else: num_dim = len(t._trt.shape) # non-leaf tensors must already have _trt, get shape from that @@ -127,7 +127,7 @@ def trt_(network, *tensors): trt_tensor = t._trt # or... add constant for leaf tensor w/o _trt - elif isinstance(t, torch.Tensor) and t.is_leaf and not hasattr(t, '_trt'): + elif isinstance(t, torch.Tensor) and not hasattr(t, '_trt'): # add leaf tensor shape = tuple(t.shape) # don't exclude batch when adding constants...? weight = t.detach().cpu().numpy() @@ -140,7 +140,7 @@ def trt_(network, *tensors): scalar = t * torch.ones(shape, dtype=dtype).cpu().numpy() trt_tensor = network.add_constant(shape, scalar).get_output(0) - assert(trt_tensor is not None)#, 'TensorRT tensor could not be created') + assert(trt_tensor is not None) # MAKE TRT TENSOR BROADCASTABLE IF IT IS NOT ALREADY @@ -333,9 +333,6 @@ def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt fp16_mode=False, max_workspace_size=0, strict_type_constraints=False): # copy inputs to avoid modifications to source data - for t in inputs: - t.requires_grad = True - inputs = [tensor.clone() for tensor in inputs] with trt.Logger(log_level) as logger, trt.Builder(logger) as builder,\ From d8b43bd168e47057671665153e1577c910631ba3 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 4 Oct 2019 10:05:47 -0700 Subject: [PATCH 214/355] added jetson xavier benchmark --- benchmarks/JETSON_XAVIER.md | 49 ++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/benchmarks/JETSON_XAVIER.md b/benchmarks/JETSON_XAVIER.md index fc532d57..63ece334 100644 --- a/benchmarks/JETSON_XAVIER.md +++ b/benchmarks/JETSON_XAVIER.md @@ -1,22 +1,31 @@ | Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | Throughput (PyTorch) | Throughput (TensorRT) | Latency (PyTorch) | Latency (TensorRT) | |------|-----------|--------------|------------------|-----------|----------------------|-----------------------|-------------------|--------------------| -| torchvision.models.alexnet.alexnet | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.29E-05 | 250 | 580 | 4.75 | 1.93 | -| torchvision.models.squeezenet.squeezenet1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.03E-02 | 130 | 890 | 7.31 | 1.37 | -| torchvision.models.squeezenet.squeezenet1_1 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.95E-03 | 132 | 1.39e+03 | 7.41 | 0.951 | -| torchvision.models.resnet.resnet18 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.37E-03 | 140 | 712 | 7.1 | 1.64 | -| torchvision.models.resnet.resnet34 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.09E-01 | 79.2 | 393 | 12.6 | 2.79 | -| torchvision.models.resnet.resnet50 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.57E-02 | 55.5 | 312 | 17.6 | 3.48 | -| torchvision.models.resnet.resnet101 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 28.5 | 170 | 34.8 | 6.22 | -| torchvision.models.resnet.resnet152 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 18.9 | 121 | 52.1 | 8.58 | -| torchvision.models.densenet.densenet121 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.95E-03 | 23 | 168 | 43.3 | 6.37 | -| torchvision.models.densenet.densenet169 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.39E-03 | 16.3 | 118 | 60.2 | 8.83 | -| torchvision.models.densenet.densenet201 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.03E-03 | 13.3 | 90.9 | 72.7 | 11.4 | -| torchvision.models.densenet.densenet161 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.91E-03 | 17.2 | 82.4 | 56.3 | 12.6 | -| torchvision.models.vgg.vgg11 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 7.32E-04 | 85.2 | 201 | 12 | 5.16 | -| torchvision.models.vgg.vgg13 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 8.24E-04 | 71.9 | 166 | 14.2 | 6.27 | -| torchvision.models.vgg.vgg16 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.01E-03 | 61.7 | 139 | 16.6 | 7.46 | -| torchvision.models.vgg.vgg19 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.80E-03 | 54.1 | 121 | 18.8 | 8.52 | -| torchvision.models.vgg.vgg11_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.80E-04 | 81.8 | 201 | 12.5 | 5.16 | -| torchvision.models.vgg.vgg13_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 6.03E-04 | 68 | 166 | 15 | 6.27 | -| torchvision.models.vgg.vgg16_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.45E-03 | 58.5 | 140 | 17.4 | 7.41 | -| torchvision.models.vgg.vgg19_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 6.64E-04 | 51.4 | 121 | 19.8 | 8.52 | +| torch2trt.tests.torchvision.classification.alexnet | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 7.63E-05 | 251 | 565 | 4.96 | 2.02 | +| torch2trt.tests.torchvision.classification.squeezenet1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.77E-04 | 121 | 834 | 8.04 | 1.49 | +| torch2trt.tests.torchvision.classification.squeezenet1_1 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.77E-04 | 125 | 1.29e+03 | 8.01 | 1.02 | +| torch2trt.tests.torchvision.classification.resnet18 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.77E-03 | 136 | 722 | 7.33 | 1.64 | +| torch2trt.tests.torchvision.classification.resnet34 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.50E-01 | 77.8 | 396 | 12.9 | 2.79 | +| torch2trt.tests.torchvision.classification.resnet50 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.09E-01 | 55.8 | 326 | 17.9 | 3.37 | +| torch2trt.tests.torchvision.classification.resnet101 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 28.3 | 175 | 35.1 | 6.04 | +| torch2trt.tests.torchvision.classification.resnet152 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 18.8 | 122 | 53.2 | 8.57 | +| torch2trt.tests.torchvision.classification.densenet121 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 7.81E-03 | 20.9 | 76.6 | 47.5 | 13 | +| torch2trt.tests.torchvision.classification.densenet169 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.91E-03 | 14.8 | 41.7 | 66.7 | 23.7 | +| torch2trt.tests.torchvision.classification.densenet201 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.88E-03 | 12.6 | 30.2 | 79.1 | 33 | +| torch2trt.tests.torchvision.classification.densenet161 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.88E-03 | 16.1 | 43.7 | 62.1 | 23 | +| torch2trt.tests.torchvision.classification.vgg11 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.56E-03 | 84.8 | 201 | 12.1 | 5.24 | +| torch2trt.tests.torchvision.classification.vgg13 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.24E-03 | 71.1 | 165 | 14.3 | 6.34 | +| torch2trt.tests.torchvision.classification.vgg16 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.78E-03 | 61.5 | 139 | 16.5 | 7.46 | +| torch2trt.tests.torchvision.classification.vgg19 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.81E-03 | 54.1 | 120 | 18.7 | 8.61 | +| torch2trt.tests.torchvision.classification.vgg11_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.20E-03 | 81.5 | 200 | 12.5 | 5.27 | +| torch2trt.tests.torchvision.classification.vgg13_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.71E-03 | 67.5 | 165 | 15.1 | 6.33 | +| torch2trt.tests.torchvision.classification.vgg16_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.87E-03 | 58.3 | 139 | 17.4 | 7.48 | +| torch2trt.tests.torchvision.classification.vgg19_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.44E-03 | 51.4 | 120 | 19.7 | 8.61 | +| torch2trt.tests.torchvision.classification.mobilenet_v2 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 64.8 | 723 | 15.4 | 1.67 | +| torch2trt.tests.torchvision.classification.shufflenet_v2_x0_5 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.53E-05 | 51.2 | 463 | 19.4 | 2.17 | +| torch2trt.tests.torchvision.classification.shufflenet_v2_x1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.53E-05 | 49.4 | 419 | 20.4 | 2.43 | +| torch2trt.tests.torchvision.classification.shufflenet_v2_x1_5 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.53E-05 | 51.4 | 426 | 19.6 | 2.37 | +| torch2trt.tests.torchvision.classification.shufflenet_v2_x2_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.53E-05 | 48.2 | 419 | 20.8 | 2.48 | +| torch2trt.tests.torchvision.classification.mnasnet0_5 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.03E-06 | 67.8 | 883 | 14.9 | 1.4 | +| torch2trt.tests.torchvision.classification.mnasnet0_75 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 67.6 | 751 | 14.8 | 1.6 | +| torch2trt.tests.torchvision.classification.mnasnet1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 65.7 | 667 | 15.2 | 1.77 | +| torch2trt.tests.torchvision.classification.mnasnet1_3 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 67.4 | 573 | 15 | 2.02 | \ No newline at end of file From 9e8753a15e6825e37d196ba02093a32832f8f9b9 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 4 Oct 2019 11:44:52 -0700 Subject: [PATCH 215/355] added python2 serialization fix (bytes -> bytearray) --- torch2trt/torch2trt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index c8524702..3715a072 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -287,7 +287,7 @@ def __init__(self, engine=None, input_names=None, output_names=None): self.output_names = output_names def _on_state_dict(self, state_dict, prefix, local_metadata): - state_dict[prefix + 'engine'] = bytes(self.engine.serialize()) + state_dict[prefix + 'engine'] = bytearray(self.engine.serialize()) state_dict[prefix + 'input_names'] = self.input_names state_dict[prefix + 'output_names'] = self.output_names From 713086db24091de3c0ea70780e3d96c35a9104db Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 4 Oct 2019 11:48:02 -0700 Subject: [PATCH 216/355] added save_load test --- torch2trt/tests/torchvision/save_load.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 torch2trt/tests/torchvision/save_load.py diff --git a/torch2trt/tests/torchvision/save_load.py b/torch2trt/tests/torchvision/save_load.py new file mode 100644 index 00000000..ee6d558a --- /dev/null +++ b/torch2trt/tests/torchvision/save_load.py @@ -0,0 +1,24 @@ + +from torch2trt.torch2trt import torch2trt, TRTModule + + +if __name__ == '__main__': + import torchvision + import torch + model = torchvision.models.resnet18(pretrained=True).cuda().eval() + data = torch.randn((1, 3, 224, 224)).cuda() + + print('Running torch2trt...') + model_trt = torch2trt(model, [data]) + + print('Saving model...') + torch.save(model_trt.state_dict(), '.test_model.pth') + + print('Loading model...') + model_trt_2 = TRTModule() + model_trt_2.load_state_dict(torch.load('.test_model.pth')) + + assert(model_trt_2.engine is not None) + + print(torch.max(torch.abs(model_trt_2(data) - model(data)))) + print(torch.max(torch.abs(model_trt_2(data) - model_trt(data)))) \ No newline at end of file From 2d373fed7c42b72da6d65e0692e135bd4abd753f Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 4 Oct 2019 12:26:24 -0700 Subject: [PATCH 217/355] added save/load test --- torch2trt/tests/torchvision/save_load.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/torch2trt/tests/torchvision/save_load.py b/torch2trt/tests/torchvision/save_load.py index ee6d558a..15af6728 100644 --- a/torch2trt/tests/torchvision/save_load.py +++ b/torch2trt/tests/torchvision/save_load.py @@ -1,15 +1,15 @@ - -from torch2trt.torch2trt import torch2trt, TRTModule +from torch2trt import * +import torchvision +import torch +from .segmentation import deeplabv3_resnet50 if __name__ == '__main__': - import torchvision - import torch - model = torchvision.models.resnet18(pretrained=True).cuda().eval() - data = torch.randn((1, 3, 224, 224)).cuda() - + model = deeplabv3_resnet50().cuda().eval().half() + data = torch.randn((1, 3, 224, 224)).cuda().half() + print('Running torch2trt...') - model_trt = torch2trt(model, [data]) + model_trt = torch2trt(model, [data], fp16_mode=True, max_workspace_size=1<<25) print('Saving model...') torch.save(model_trt.state_dict(), '.test_model.pth') From 4b5a3c2ac02c34e55b1ffb28b29dc4d037e7be29 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 4 Oct 2019 16:16:35 -0700 Subject: [PATCH 218/355] extra ignore --- .gitignore | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 03192ad5..172017dc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ .ninja_deps .ninja_log build.ninja +tags *.o *.pb.o torch2trt.egg-info @@ -10,4 +11,7 @@ __pycache__/ *.so *.pb.h *.pb.cc -*_pb2.py \ No newline at end of file +*_pb2.py +*.pyc +*.ipynb_checkpoints +*.pth From 6c403fc56a578540ca2464990fce197ba9c058fb Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 4 Oct 2019 16:49:52 -0700 Subject: [PATCH 219/355] added leaky_relu --- torch2trt/converters/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 9c6a8f2a..967c2b3e 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -1,3 +1,4 @@ +from .activation import * from .adaptive_avg_pool2d import * from .AdaptiveAvgPool2d import * from .add import * From 11c465cb64395b7b36f71b7405f46a1669083462 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 4 Oct 2019 16:53:17 -0700 Subject: [PATCH 220/355] added elu --- torch2trt/converters/activation.py | 67 ++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 torch2trt/converters/activation.py diff --git a/torch2trt/converters/activation.py b/torch2trt/converters/activation.py new file mode 100644 index 00000000..99624baa --- /dev/null +++ b/torch2trt/converters/activation.py @@ -0,0 +1,67 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test +from .unary import UnaryModule + + +# | RELU : Rectified Linear activation (impl in relu.py) +# | SIGMOID : Sigmoid activation (impl in sigmoid.py) +# | TANH : Hyperbolic Tangent activation (impl in tanh.py) + + +# | LEAKY_RELU : Leaky Relu activation: f(x) = x if x >= 0, f(x) = alpha * x if x < 0 + + +@tensorrt_converter('torch.nn.functional.leaky_relu') +@tensorrt_converter('torch.nn.functional.leaky_relu_') +def convert_leaky_relu(ctx): + input = get_arg(ctx, 'input', pos=0, default=None) + negative_slope = get_arg(ctx, 'negative_slope', pos=1, default=0.01) + output = ctx.method_return + + input_trt = trt_(ctx.network, input) + layer = ctx.network.add_activation(input_trt, trt.ActivationType.LEAKY_RELU) + layer.alpha = negative_slope + + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_leaky_relu(): + return UnaryModule(lambda x: torch.nn.functional.leaky_relu(x)) + + +# | ELU : Elu activation: f(x) = x if x >= 0, f(x) = alpha * (exp(x) - 1) if x < 0 + + +@tensorrt_converter('torch.nn.functional.elu') +@tensorrt_converter('torch.nn.functional.elu_') +def convert_elu(ctx): + input = get_arg(ctx, 'input', pos=0, default=None) + alpha = get_arg(ctx, 'alpha', pos=1, default=1.0) + output = ctx.method_return + + input_trt = trt_(ctx.network, input) + layer = ctx.network.add_activation(input_trt, trt.ActivationType.ELU) + layer.alpha = alpha + + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_elu(): + return UnaryModule(lambda x: torch.nn.functional.elu(x)) + + +# | SELU : Selu activation: f(x) = beta * x if x > 0, f(x) = beta * (alpha * exp(x) - alpha) if x <= 0 +# | +# | SOFTSIGN : Softsign activation: f(x) = x / (1 + \|x\|) +# | +# | SOFTPLUS : Softplus activation: f(x) = alpha * log(exp(beta * x) + 1) +# | +# | CLIP : Clip activation: f(x) = max(alpha, min(beta, x)) (impl in clamp.py) +# | +# | HARD_SIGMOID : Hard sigmoid activation: f(x) = max(0, min(1, alpha * x + beta)) +# | +# | SCALED_TANH : Scaled Tanh activation: f(x) = alpha * tanh(beta * x) +# | +# | THRESHOLDED_RELU : Thresholded Relu activation: f(x) = x if x > alpha, f(x) = 0 if x <= alpha \ No newline at end of file From e2ebb8e00372f8e8fe312d70e760a8b4a1342ef9 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 4 Oct 2019 17:05:45 -0700 Subject: [PATCH 221/355] added selu, softsign, elu --- torch2trt/converters/activation.py | 43 ++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/torch2trt/converters/activation.py b/torch2trt/converters/activation.py index 99624baa..81fb234d 100644 --- a/torch2trt/converters/activation.py +++ b/torch2trt/converters/activation.py @@ -53,9 +53,48 @@ def test_elu(): # | SELU : Selu activation: f(x) = beta * x if x > 0, f(x) = beta * (alpha * exp(x) - alpha) if x <= 0 -# | + +@tensorrt_converter('torch.selu') +@tensorrt_converter('torch.selu_') +@tensorrt_converter('torch.nn.functional.selu') +@tensorrt_converter('torch.nn.functional.selu_') +def convert_selu(ctx): + input = get_arg(ctx, 'input', pos=0, default=None) + alpha = get_arg(ctx, 'alpha', pos=1, default=1.0) + output = ctx.method_return + + input_trt = trt_(ctx.network, input) + layer = ctx.network.add_activation(input_trt, trt.ActivationType.SELU) + layer.alpha = 1.6732632423543772848170429916717 + layer.beta = 1.0507009873554804934193349852946 + + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_selu(): + return UnaryModule(lambda x: torch.nn.functional.selu(x)) + + # | SOFTSIGN : Softsign activation: f(x) = x / (1 + \|x\|) -# | + + +@tensorrt_converter('torch.nn.functional.softsign') +def convert_softsign(ctx): + input = get_arg(ctx, 'input', pos=0, default=None) + output = ctx.method_return + + input_trt = trt_(ctx.network, input) + layer = ctx.network.add_activation(input_trt, trt.ActivationType.SOFTSIGN) + + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_softsign(): + return UnaryModule(lambda x: torch.nn.functional.softsign(x)) + + # | SOFTPLUS : Softplus activation: f(x) = alpha * log(exp(beta * x) + 1) # | # | CLIP : Clip activation: f(x) = max(alpha, min(beta, x)) (impl in clamp.py) From 9b3a3c4249ce731afe538d9446743fbffa81b467 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 4 Oct 2019 17:07:23 -0700 Subject: [PATCH 222/355] added softplus --- torch2trt/converters/activation.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/torch2trt/converters/activation.py b/torch2trt/converters/activation.py index 81fb234d..da17e94c 100644 --- a/torch2trt/converters/activation.py +++ b/torch2trt/converters/activation.py @@ -96,7 +96,24 @@ def test_softsign(): # | SOFTPLUS : Softplus activation: f(x) = alpha * log(exp(beta * x) + 1) -# | + + +@tensorrt_converter('torch.nn.functional.softplus') +def convert_softplus(ctx): + input = get_arg(ctx, 'input', pos=0, default=None) + output = ctx.method_return + + input_trt = trt_(ctx.network, input) + layer = ctx.network.add_activation(input_trt, trt.ActivationType.SOFTPLUS) + + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_softplus(): + return UnaryModule(lambda x: torch.nn.functional.softplus(x)) + + # | CLIP : Clip activation: f(x) = max(alpha, min(beta, x)) (impl in clamp.py) # | # | HARD_SIGMOID : Hard sigmoid activation: f(x) = max(0, min(1, alpha * x + beta)) From 2cb97a3f0b68b58f3af771e061b7f4489863d225 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 4 Oct 2019 17:16:12 -0700 Subject: [PATCH 223/355] softplus, softsign --- torch2trt/converters/activation.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/torch2trt/converters/activation.py b/torch2trt/converters/activation.py index da17e94c..6a10f365 100644 --- a/torch2trt/converters/activation.py +++ b/torch2trt/converters/activation.py @@ -115,9 +115,7 @@ def test_softplus(): # | CLIP : Clip activation: f(x) = max(alpha, min(beta, x)) (impl in clamp.py) -# | -# | HARD_SIGMOID : Hard sigmoid activation: f(x) = max(0, min(1, alpha * x + beta)) -# | -# | SCALED_TANH : Scaled Tanh activation: f(x) = alpha * tanh(beta * x) -# | -# | THRESHOLDED_RELU : Thresholded Relu activation: f(x) = x if x > alpha, f(x) = 0 if x <= alpha \ No newline at end of file + +# | HARD_SIGMOID : Hard sigmoid activation: f(x) = max(0, min(1, alpha * x + beta)) (not sure if there is this in Pytorch?) +# | SCALED_TANH : Scaled Tanh activation: f(x) = alpha * tanh(beta * x) (not sure if there is this in Pytorch?) +# | THRESHOLDED_RELU : Thresholded Relu activation: f(x) = x if x > alpha, f(x) = 0 if x <= alpha (not sure if there is this in Pytorch?) \ No newline at end of file From 384384e5f890fedaab329098b34123beebfd7d9c Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 8 Oct 2019 14:25:10 -0700 Subject: [PATCH 224/355] added network->dot graph util method --- torch2trt/torch2trt.py | 20 ++++++++++---- torch2trt/utils.py | 62 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 6 deletions(-) create mode 100644 torch2trt/utils.py diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 3715a072..fecb8718 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -194,7 +194,7 @@ def wrapper(*args, **kwargs): ctx.method_kwargs = kwargs ctx.method_return = outputs - #print('%s : %s' % (method.__qualname__, converter.__name__)) +# print('%s' % (converter.__name__,)) converter(ctx) # convert to None so conversion will fail for unsupported layers @@ -330,13 +330,16 @@ def forward(self, *inputs): def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt.Logger.ERROR, max_batch_size=1, - fp16_mode=False, max_workspace_size=0, strict_type_constraints=False): + fp16_mode=False, max_workspace_size=0, strict_type_constraints=False, keep_network=True): # copy inputs to avoid modifications to source data inputs = [tensor.clone() for tensor in inputs] - - with trt.Logger(log_level) as logger, trt.Builder(logger) as builder,\ - builder.create_network() as network, ConversionContext(network) as ctx: + + logger = trt.Logger(log_level) + builder = trt.Builder(logger) + network = builder.create_network() + + with ConversionContext(network) as ctx: if isinstance(inputs, list): inputs = tuple(inputs) @@ -357,7 +360,12 @@ def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt engine = builder.build_cuda_engine(network) - return TRTModule(engine, ctx.input_names, ctx.output_names) + module_trt = TRTModule(engine, ctx.input_names, ctx.output_names) + + if keep_network=True: + module_trt.network = network + + return module_trt # DEFINE ALL CONVERSION FUNCTIONS diff --git a/torch2trt/utils.py b/torch2trt/utils.py new file mode 100644 index 00000000..370c55a8 --- /dev/null +++ b/torch2trt/utils.py @@ -0,0 +1,62 @@ +import graphviz + + +def trt_network_to_dot_graph(network): + dot = Digraph(comment='Network') + + # add nodes (layers) + for i in range(network.num_layers): + layer = network.get_layer(i) + dot.node(layer.name) + + # add nodes (inputs) + for i in range(network.num_inputs): + dot.node(network.get_input(i).name) + + # add nodes (outputs) + for i in range(network.num_outputs): + dot.node(network.get_output(i).name) + + # add layer->layer edges + for a in range(network.num_layers): + layer_a = network.get_layer(a) + + for b in range(network.num_layers): + layer_b = network.get_layer(b) + + for i in range(layer_a.num_outputs): + output_i = layer_a.get_output(i) + + for j in range(layer_b.num_inputs): + input_j = layer_b.get_input(j) + + if output_i == input_j: + dot.edge(layer_a.name, layer_b.name, label=str(input_j.shape)) + + # add input->layer edges + for i in range(network.num_inputs): + input_i = network.get_input(i) + + for b in range(network.num_layers): + layer_b = network.get_layer(b) + + for j in range(layer_b.num_inputs): + input_j = layer_b.get_input(j) + + if input_i == input_j: + dot.edge(input_i.name, layer_b.name, label=str(input_j.shape)) + + # add layer->output edges + for i in range(network.num_outputs): + input_i = network.get_output(i) + + for b in range(network.num_layers): + layer_b = network.get_layer(b) + + for j in range(layer_b.num_outputs): + input_j = layer_b.get_output(j) + + if input_i == input_j: + dot.edge(layer_b.name, input_i.name, label=str(input_j.shape)) + + return dot \ No newline at end of file From e0b0e7167fe3eea35e6034d4401f31f44979926c Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 8 Oct 2019 14:25:10 -0700 Subject: [PATCH 225/355] added network->dot graph util method --- torch2trt/torch2trt.py | 20 ++++++++++---- torch2trt/utils.py | 62 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 6 deletions(-) create mode 100644 torch2trt/utils.py diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 3715a072..b39f4388 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -194,7 +194,7 @@ def wrapper(*args, **kwargs): ctx.method_kwargs = kwargs ctx.method_return = outputs - #print('%s : %s' % (method.__qualname__, converter.__name__)) +# print('%s' % (converter.__name__,)) converter(ctx) # convert to None so conversion will fail for unsupported layers @@ -330,13 +330,16 @@ def forward(self, *inputs): def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt.Logger.ERROR, max_batch_size=1, - fp16_mode=False, max_workspace_size=0, strict_type_constraints=False): + fp16_mode=False, max_workspace_size=0, strict_type_constraints=False, keep_network=True): # copy inputs to avoid modifications to source data inputs = [tensor.clone() for tensor in inputs] - - with trt.Logger(log_level) as logger, trt.Builder(logger) as builder,\ - builder.create_network() as network, ConversionContext(network) as ctx: + + logger = trt.Logger(log_level) + builder = trt.Builder(logger) + network = builder.create_network() + + with ConversionContext(network) as ctx: if isinstance(inputs, list): inputs = tuple(inputs) @@ -357,7 +360,12 @@ def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt engine = builder.build_cuda_engine(network) - return TRTModule(engine, ctx.input_names, ctx.output_names) + module_trt = TRTModule(engine, ctx.input_names, ctx.output_names) + + if keep_network: + module_trt.network = network + + return module_trt # DEFINE ALL CONVERSION FUNCTIONS diff --git a/torch2trt/utils.py b/torch2trt/utils.py new file mode 100644 index 00000000..370c55a8 --- /dev/null +++ b/torch2trt/utils.py @@ -0,0 +1,62 @@ +import graphviz + + +def trt_network_to_dot_graph(network): + dot = Digraph(comment='Network') + + # add nodes (layers) + for i in range(network.num_layers): + layer = network.get_layer(i) + dot.node(layer.name) + + # add nodes (inputs) + for i in range(network.num_inputs): + dot.node(network.get_input(i).name) + + # add nodes (outputs) + for i in range(network.num_outputs): + dot.node(network.get_output(i).name) + + # add layer->layer edges + for a in range(network.num_layers): + layer_a = network.get_layer(a) + + for b in range(network.num_layers): + layer_b = network.get_layer(b) + + for i in range(layer_a.num_outputs): + output_i = layer_a.get_output(i) + + for j in range(layer_b.num_inputs): + input_j = layer_b.get_input(j) + + if output_i == input_j: + dot.edge(layer_a.name, layer_b.name, label=str(input_j.shape)) + + # add input->layer edges + for i in range(network.num_inputs): + input_i = network.get_input(i) + + for b in range(network.num_layers): + layer_b = network.get_layer(b) + + for j in range(layer_b.num_inputs): + input_j = layer_b.get_input(j) + + if input_i == input_j: + dot.edge(input_i.name, layer_b.name, label=str(input_j.shape)) + + # add layer->output edges + for i in range(network.num_outputs): + input_i = network.get_output(i) + + for b in range(network.num_layers): + layer_b = network.get_layer(b) + + for j in range(layer_b.num_outputs): + input_j = layer_b.get_output(j) + + if input_i == input_j: + dot.edge(layer_b.name, input_i.name, label=str(input_j.shape)) + + return dot \ No newline at end of file From 323f1d3e9e1017e8498220ddc7bb04dbbe5b10ae Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 8 Oct 2019 14:36:52 -0700 Subject: [PATCH 226/355] graphviz module prefix fix --- torch2trt/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch2trt/utils.py b/torch2trt/utils.py index 370c55a8..c3423328 100644 --- a/torch2trt/utils.py +++ b/torch2trt/utils.py @@ -2,7 +2,7 @@ def trt_network_to_dot_graph(network): - dot = Digraph(comment='Network') + dot = graphviz.Digraph(comment='Network') # add nodes (layers) for i in range(network.num_layers): From e83730e864c3fcfbba1540276b876627d362363c Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sun, 20 Oct 2019 12:29:28 -0700 Subject: [PATCH 227/355] fixed Linear w.o bias --- torch2trt/converters/Linear.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/torch2trt/converters/Linear.py b/torch2trt/converters/Linear.py index 28534478..ea8cdd44 100644 --- a/torch2trt/converters/Linear.py +++ b/torch2trt/converters/Linear.py @@ -13,12 +13,16 @@ def convert_Linear(ctx): layer = ctx.network.add_shuffle(input_trt) layer.reshape_dims = (-1, 1, 1) + bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) + if module.bias is not None: + bias = module.bias.detach().cpu().numpy() + # add fully connected layer = ctx.network.add_fully_connected( input=layer.get_output(0), num_outputs=module.out_features, kernel=module.weight.detach().cpu().numpy(), - bias=module.bias.detach().cpu().numpy()) + bias=bias) # reshape back to N layer = ctx.network.add_shuffle(layer.get_output(0)) @@ -30,3 +34,8 @@ def convert_Linear(ctx): @add_module_test(torch.float32, torch.device('cuda'), [(1, 10)]) def test_Linear_basic(): return torch.nn.Linear(10, 5) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10)]) +def test_Linear_no_bias(): + return torch.nn.Linear(10, 5, bias=False) \ No newline at end of file From 3fac66b8197d5ef54a66cd7193243e38205df21c Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 21 Oct 2019 15:47:34 -0700 Subject: [PATCH 228/355] handle non-existance torch method with existing converter --- torch2trt/torch2trt.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index b39f4388..1575a946 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -214,17 +214,23 @@ class ConversionHook(object): def __init__(self, ctx, method, converter): self.ctx = ctx self.method_str = method - self.method_impl = eval(method) self.converter = converter def _set_method(self, method): exec('%s = method' % self.method_str) def __enter__(self): - self._set_method(attach_converter(self.ctx, self.method_impl, self.converter)) + try: + self.method_impl = eval(self.method_str) + except AttributeError: + self.method_impl = None + + if self.method_impl: + self._set_method(attach_converter(self.ctx, self.method_impl, self.converter)) def __exit__(self, type, val, tb): - self._set_method(self.method_impl) + if self.method_impl: + self._set_method(self.method_impl) class ConversionContext(object): From c76f141ff72b13ee11b193d7946d1b47113af654 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 21 Oct 2019 16:00:42 -0700 Subject: [PATCH 229/355] added profiling --- torch2trt/torch2trt.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 1575a946..8308d796 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -333,6 +333,10 @@ def forward(self, *inputs): outputs = outputs[0] return outputs + + def enable_profiling(self): + if not self.context.profiler: + self.context.profiler = trt.Profiler() def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt.Logger.ERROR, max_batch_size=1, From ace0abdbc007248c69911cc4bf810579fe585c0a Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 8 Oct 2019 14:25:10 -0700 Subject: [PATCH 230/355] added network->dot graph util method --- torch2trt/torch2trt.py | 20 ++++++++++---- torch2trt/utils.py | 62 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 6 deletions(-) create mode 100644 torch2trt/utils.py diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 3715a072..fecb8718 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -194,7 +194,7 @@ def wrapper(*args, **kwargs): ctx.method_kwargs = kwargs ctx.method_return = outputs - #print('%s : %s' % (method.__qualname__, converter.__name__)) +# print('%s' % (converter.__name__,)) converter(ctx) # convert to None so conversion will fail for unsupported layers @@ -330,13 +330,16 @@ def forward(self, *inputs): def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt.Logger.ERROR, max_batch_size=1, - fp16_mode=False, max_workspace_size=0, strict_type_constraints=False): + fp16_mode=False, max_workspace_size=0, strict_type_constraints=False, keep_network=True): # copy inputs to avoid modifications to source data inputs = [tensor.clone() for tensor in inputs] - - with trt.Logger(log_level) as logger, trt.Builder(logger) as builder,\ - builder.create_network() as network, ConversionContext(network) as ctx: + + logger = trt.Logger(log_level) + builder = trt.Builder(logger) + network = builder.create_network() + + with ConversionContext(network) as ctx: if isinstance(inputs, list): inputs = tuple(inputs) @@ -357,7 +360,12 @@ def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt engine = builder.build_cuda_engine(network) - return TRTModule(engine, ctx.input_names, ctx.output_names) + module_trt = TRTModule(engine, ctx.input_names, ctx.output_names) + + if keep_network=True: + module_trt.network = network + + return module_trt # DEFINE ALL CONVERSION FUNCTIONS diff --git a/torch2trt/utils.py b/torch2trt/utils.py new file mode 100644 index 00000000..370c55a8 --- /dev/null +++ b/torch2trt/utils.py @@ -0,0 +1,62 @@ +import graphviz + + +def trt_network_to_dot_graph(network): + dot = Digraph(comment='Network') + + # add nodes (layers) + for i in range(network.num_layers): + layer = network.get_layer(i) + dot.node(layer.name) + + # add nodes (inputs) + for i in range(network.num_inputs): + dot.node(network.get_input(i).name) + + # add nodes (outputs) + for i in range(network.num_outputs): + dot.node(network.get_output(i).name) + + # add layer->layer edges + for a in range(network.num_layers): + layer_a = network.get_layer(a) + + for b in range(network.num_layers): + layer_b = network.get_layer(b) + + for i in range(layer_a.num_outputs): + output_i = layer_a.get_output(i) + + for j in range(layer_b.num_inputs): + input_j = layer_b.get_input(j) + + if output_i == input_j: + dot.edge(layer_a.name, layer_b.name, label=str(input_j.shape)) + + # add input->layer edges + for i in range(network.num_inputs): + input_i = network.get_input(i) + + for b in range(network.num_layers): + layer_b = network.get_layer(b) + + for j in range(layer_b.num_inputs): + input_j = layer_b.get_input(j) + + if input_i == input_j: + dot.edge(input_i.name, layer_b.name, label=str(input_j.shape)) + + # add layer->output edges + for i in range(network.num_outputs): + input_i = network.get_output(i) + + for b in range(network.num_layers): + layer_b = network.get_layer(b) + + for j in range(layer_b.num_outputs): + input_j = layer_b.get_output(j) + + if input_i == input_j: + dot.edge(layer_b.name, input_i.name, label=str(input_j.shape)) + + return dot \ No newline at end of file From f7dbb969be10f40c735962076e4587d1310b29e6 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 8 Oct 2019 14:25:10 -0700 Subject: [PATCH 231/355] added network->dot graph util method --- torch2trt/torch2trt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index fecb8718..b39f4388 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -362,7 +362,7 @@ def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt module_trt = TRTModule(engine, ctx.input_names, ctx.output_names) - if keep_network=True: + if keep_network: module_trt.network = network return module_trt From 84101a9f1eaf78b5cf2538a95267320f74ab3efa Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 8 Oct 2019 14:36:52 -0700 Subject: [PATCH 232/355] graphviz module prefix fix --- torch2trt/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch2trt/utils.py b/torch2trt/utils.py index 370c55a8..c3423328 100644 --- a/torch2trt/utils.py +++ b/torch2trt/utils.py @@ -2,7 +2,7 @@ def trt_network_to_dot_graph(network): - dot = Digraph(comment='Network') + dot = graphviz.Digraph(comment='Network') # add nodes (layers) for i in range(network.num_layers): From 97260f29f8f2c8a938f219f6f21875860ddcbeb4 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 21 Oct 2019 15:47:34 -0700 Subject: [PATCH 233/355] handle non-existance torch method with existing converter --- torch2trt/torch2trt.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index b39f4388..1575a946 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -214,17 +214,23 @@ class ConversionHook(object): def __init__(self, ctx, method, converter): self.ctx = ctx self.method_str = method - self.method_impl = eval(method) self.converter = converter def _set_method(self, method): exec('%s = method' % self.method_str) def __enter__(self): - self._set_method(attach_converter(self.ctx, self.method_impl, self.converter)) + try: + self.method_impl = eval(self.method_str) + except AttributeError: + self.method_impl = None + + if self.method_impl: + self._set_method(attach_converter(self.ctx, self.method_impl, self.converter)) def __exit__(self, type, val, tb): - self._set_method(self.method_impl) + if self.method_impl: + self._set_method(self.method_impl) class ConversionContext(object): From 0a2c3eb2f22c985c29f5c00059bbf20eb181ed50 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 21 Oct 2019 16:00:42 -0700 Subject: [PATCH 234/355] added profiling --- torch2trt/torch2trt.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 1575a946..8308d796 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -333,6 +333,10 @@ def forward(self, *inputs): outputs = outputs[0] return outputs + + def enable_profiling(self): + if not self.context.profiler: + self.context.profiler = trt.Profiler() def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt.Logger.ERROR, max_batch_size=1, From e520dd22ccc5d9b417c486dc6bda9bd314cf8997 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 28 Oct 2019 15:24:38 -0700 Subject: [PATCH 235/355] added dummy --- torch2trt/converters/__init__.py | 6 +++++ torch2trt/converters/dummy_converters.py | 31 ++++++++++++++++++++++++ torch2trt/torch2trt.py | 19 +++++++++------ 3 files changed, 48 insertions(+), 8 deletions(-) create mode 100644 torch2trt/converters/dummy_converters.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 967c2b3e..994d5875 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -1,3 +1,9 @@ +# dummy converters throw warnings method encountered + +from .dummy_converters import * + +# supported converters will override dummy converters + from .activation import * from .adaptive_avg_pool2d import * from .AdaptiveAvgPool2d import * diff --git a/torch2trt/converters/dummy_converters.py b/torch2trt/converters/dummy_converters.py new file mode 100644 index 00000000..c5b06fe6 --- /dev/null +++ b/torch2trt/converters/dummy_converters.py @@ -0,0 +1,31 @@ +from torch2trt.torch2trt import * + + +def is_private(method): + method = method.split('.')[-1] # remove prefix + return method[0] == '_' and method[1] is not '_' + +def is_function_type(method): + fntype = eval(method + '.__class__.__name__') + return fntype == 'function' or fntype == 'builtin_function_or_method' or fntype == 'method_descriptor' + +def get_methods(namespace): + methods = [] + for method in dir(eval(namespace)): + full_method = namespace + '.' + method + if not is_private(full_method) and is_function_type(full_method): + methods.append(full_method) + return methods + + +TORCH_METHODS = [] +TORCH_METHODS += get_methods('torch') +TORCH_METHODS += get_methods('torch.Tensor') +TORCH_METHODS += get_methods('torch.nn.functional') + + +for method in TORCH_METHODS: + + @tensorrt_converter(method, is_real=False) + def warn_method(ctx): + print('Warning: Encountered unsupported method %s' % ctx.method_str) \ No newline at end of file diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 8308d796..7ccd64a0 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -175,15 +175,17 @@ def get_arg(ctx, name, pos, default): return default -def attach_converter(ctx, method, converter): +def attach_converter(ctx, method, converter, method_str): """Gets a function that executes PyTorch method and TensorRT converter""" - + global DUMMY_CONVERTERS + def wrapper(*args, **kwargs): skip = True - + # check if another (parent) converter has lock if not ctx.lock: - ctx.lock = True + if converter['is_real']: + ctx.lock = True # only real converters can acquire lock skip = False # run original method @@ -193,9 +195,10 @@ def wrapper(*args, **kwargs): ctx.method_args = args ctx.method_kwargs = kwargs ctx.method_return = outputs + ctx.method_str = method_str # print('%s' % (converter.__name__,)) - converter(ctx) + converter['converter'](ctx) # convert to None so conversion will fail for unsupported layers ctx.method_args = None @@ -226,7 +229,7 @@ def __enter__(self): self.method_impl = None if self.method_impl: - self._set_method(attach_converter(self.ctx, self.method_impl, self.converter)) + self._set_method(attach_converter(self.ctx, self.method_impl, self.converter, self.method_str)) def __exit__(self, type, val, tb): if self.method_impl: @@ -381,8 +384,8 @@ def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt # DEFINE ALL CONVERSION FUNCTIONS -def tensorrt_converter(method): +def tensorrt_converter(method, is_real=True): def register_converter(converter): - CONVERTERS[method] = converter + CONVERTERS[method] = {'converter': converter, 'is_real': is_real} return converter return register_converter From 5ab156192f5dc091c9f89d1ec3eb7688b95d0063 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 28 Oct 2019 15:34:52 -0700 Subject: [PATCH 236/355] pass for Tensor.dim Tensor.size dummy converters --- torch2trt/converters/dummy_converters.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/torch2trt/converters/dummy_converters.py b/torch2trt/converters/dummy_converters.py index c5b06fe6..8f790e79 100644 --- a/torch2trt/converters/dummy_converters.py +++ b/torch2trt/converters/dummy_converters.py @@ -28,4 +28,10 @@ def get_methods(namespace): @tensorrt_converter(method, is_real=False) def warn_method(ctx): - print('Warning: Encountered unsupported method %s' % ctx.method_str) \ No newline at end of file + print('Warning: Encountered known unsupported method %s' % ctx.method_str) + + +@tensorrt_converter('torch.Tensor.dim', is_real=False) +@tensorrt_converter('torch.Tensor.size', is_real=False) +def dont_warn(ctx): + pass \ No newline at end of file From b967c991342d50e43a251f633d632246e338787f Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 28 Oct 2019 16:35:58 -0700 Subject: [PATCH 237/355] incremented patch --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b15ea7c5..0e8ded10 100644 --- a/setup.py +++ b/setup.py @@ -115,7 +115,7 @@ def run(self): setup( name='torch2trt', - version='0.0.1', + version='0.0.2', description='An easy to use PyTorch to TensorRT converter', cmdclass={ 'install': InstallCommand, From a32e68438f23b8287b5221b183a9feaace72ecc1 Mon Sep 17 00:00:00 2001 From: Kshitij Srivastava Date: Tue, 19 Nov 2019 12:54:35 -0500 Subject: [PATCH 238/355] fixed upsampling<->interpolate translation --- .../converters/interpolate/interpolate.py | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/torch2trt/converters/interpolate/interpolate.py b/torch2trt/converters/interpolate/interpolate.py index 691f0c3c..5d0ba9a4 100644 --- a/torch2trt/converters/interpolate/interpolate.py +++ b/torch2trt/converters/interpolate/interpolate.py @@ -3,7 +3,7 @@ from torch2trt.torch2trt import * from torch2trt.module_test import add_module_test from .interpolate_pb2 import interpolate_Message - +import torch.nn as nn def get_interpolate_plugin(size, mode, align_corners): PLUGIN_NAME = 'interpolate' @@ -20,12 +20,12 @@ def convert_interpolate(ctx): output = ctx.method_return try: - mode = ctx.method_kwargs['mode'] + mode = get_arg(ctx, 'mode', pos=3, default='nearest') except KeyError: mode = 'nearest' try: - align_corners = ctx.method_kwargs['align_corners'] + align_corners = get_arg(ctx, 'align_corners', pos=4, default=None) except KeyError: align_corners = False @@ -40,14 +40,18 @@ def convert_interpolate(ctx): class Interpolate(torch.nn.Module): - def __init__(self, size, mode, align_corners): + def __init__(self, size, mode, align_corners,scale_factor=None): super(Interpolate, self).__init__() self.size = size self.mode = mode self.align_corners = align_corners + self.scale_factor=scale_factor def forward(self, x): - return F.interpolate(x, self.size, mode=self.mode, align_corners=self.align_corners) + if self.scale_factor == None: + return F.interpolate(x, self.size, mode=self.mode, align_corners=self.align_corners) + else: + return F.interpolate(x, scale_factor = self.scale_factor, mode = self.mode, align_corners = self.align_corners) @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)]) @@ -68,3 +72,11 @@ def test_interpolate_bicubic(): @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)]) def test_interpolate_area(): return Interpolate((56, 56), 'area', None) + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 256, 192, 512)]) +def test_scale(): + return Interpolate(None,'bilinear',False,scale_factor=2) + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 256, 192, 512)]) +def test_nn_scale(): + return nn.Upsample(scale_factor = 2, mode='bilinear',align_corners=False) \ No newline at end of file From af560184d29a7e4c8cc6d4990a069218f787fd50 Mon Sep 17 00:00:00 2001 From: Kshitij Srivastava Date: Tue, 19 Nov 2019 14:36:41 -0500 Subject: [PATCH 239/355] reverted changes to fix build --- torch2trt/converters/interpolate/interpolate.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/torch2trt/converters/interpolate/interpolate.py b/torch2trt/converters/interpolate/interpolate.py index 5d0ba9a4..571c7a66 100644 --- a/torch2trt/converters/interpolate/interpolate.py +++ b/torch2trt/converters/interpolate/interpolate.py @@ -40,18 +40,14 @@ def convert_interpolate(ctx): class Interpolate(torch.nn.Module): - def __init__(self, size, mode, align_corners,scale_factor=None): + def __init__(self, size, mode, align_corners): super(Interpolate, self).__init__() self.size = size self.mode = mode self.align_corners = align_corners - self.scale_factor=scale_factor def forward(self, x): - if self.scale_factor == None: - return F.interpolate(x, self.size, mode=self.mode, align_corners=self.align_corners) - else: - return F.interpolate(x, scale_factor = self.scale_factor, mode = self.mode, align_corners = self.align_corners) + return F.interpolate(x, self.size, mode=self.mode, align_corners=self.align_corners) @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)]) @@ -73,10 +69,6 @@ def test_interpolate_bicubic(): def test_interpolate_area(): return Interpolate((56, 56), 'area', None) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 256, 192, 512)]) -def test_scale(): - return Interpolate(None,'bilinear',False,scale_factor=2) - @add_module_test(torch.float32, torch.device('cuda'), [(1, 256, 192, 512)]) def test_nn_scale(): return nn.Upsample(scale_factor = 2, mode='bilinear',align_corners=False) \ No newline at end of file From 4da14ed3b22c53328a3d88073233e9ce4d33db4c Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sat, 23 Nov 2019 12:05:01 -0800 Subject: [PATCH 240/355] renamed test nn_scale->upsample_scale_factor2 --- torch2trt/converters/interpolate/interpolate.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torch2trt/converters/interpolate/interpolate.py b/torch2trt/converters/interpolate/interpolate.py index 571c7a66..996aea77 100644 --- a/torch2trt/converters/interpolate/interpolate.py +++ b/torch2trt/converters/interpolate/interpolate.py @@ -69,6 +69,6 @@ def test_interpolate_bicubic(): def test_interpolate_area(): return Interpolate((56, 56), 'area', None) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 256, 192, 512)]) -def test_nn_scale(): - return nn.Upsample(scale_factor = 2, mode='bilinear',align_corners=False) \ No newline at end of file +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)]) +def test_upsample_scale_factor2(): + return nn.Upsample(scale_factor=2, mode='bilinear',align_corners=False) \ No newline at end of file From 112922b00421d8bae5f225587989a94e63134e5a Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 26 Nov 2019 14:57:07 -0800 Subject: [PATCH 241/355] added int8 support --- torch2trt/calibration.py | 65 ++++++++++++++++++++++++++++++++++++++++ torch2trt/torch2trt.py | 45 +++++++++++++++++++++------- 2 files changed, 99 insertions(+), 11 deletions(-) create mode 100644 torch2trt/calibration.py diff --git a/torch2trt/calibration.py b/torch2trt/calibration.py new file mode 100644 index 00000000..07f03cc7 --- /dev/null +++ b/torch2trt/calibration.py @@ -0,0 +1,65 @@ +import torch +import tensorrt as trt + + +class TensorBatchDataset(): + + def __init__(self, tensors): + self.tensors = tensors + + def __len__(self): + return len(self.tensors[0]) + + def __getitem__(self, idx): + return [t[idx] for t in self.tensors] + + +class DatasetCalibrator(trt.IInt8Calibrator): + + def __init__(self, dataset, batch_size=1, algorithm=trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2): + super().__init__() + + self.dataset = dataset + self.batch_size = batch_size + self.algorithm = algorithm + + # pull sample, should not include batch dimension + inputs = dataset[0] + + # create buffers that will hold random data batches + self.buffers = [] + for tensor in inputs: + size = (batch_size,) + tuple(tensor.shape) + buf = torch.randn(size=size, dtype=tensor.dtype, device=tensor.device).contiguous() + self.buffers.append(buf) + + self.count = 0 + + def get_batch(self, *args, **kwargs): + if self.count < len(self.dataset): + + for i in range(self.batch_size): + + idx = self.count % len(self.dataset) # roll around if not multiple of dataset + inputs = self.dataset[idx] + + for j, tensor in enumerate(inputs): + self.buffers[j][i].copy_(tensor) + + self.count += 1 + + return [int(buf.data_ptr()) for buf in self.buffers] + else: + return [] + + def get_algorithm(self): + return self.algorithm + + def get_batch_size(self): + return self.batch_size + + def read_calibration_cache(self): + return None + + def write_calibration_cache(self, cache): + pass \ No newline at end of file diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 7ccd64a0..9656df63 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -2,6 +2,7 @@ import tensorrt as trt from copy import copy import numpy as np +from .calibration import TensorBatchDataset, DatasetCalibrator # UTILITY FUNCTIONS @@ -342,11 +343,22 @@ def enable_profiling(self): self.context.profiler = trt.Profiler() -def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt.Logger.ERROR, max_batch_size=1, - fp16_mode=False, max_workspace_size=0, strict_type_constraints=False, keep_network=True): +def torch2trt(module, + inputs, + input_names=None, + output_names=None, + log_level=trt.Logger.ERROR, + max_batch_size=1, + fp16_mode=False, + max_workspace_size=0, + strict_type_constraints=False, + keep_network=True, + int8_mode=False, + int8_calib_dataset=None, + int8_calib_algorithm=trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2): # copy inputs to avoid modifications to source data - inputs = [tensor.clone() for tensor in inputs] + inputs = [tensor.clone()[0:1] for tensor in inputs] # only run single entry logger = trt.Logger(log_level) builder = trt.Builder(logger) @@ -366,17 +378,28 @@ def torch2trt(module, inputs, input_names=None, output_names=None, log_level=trt outputs = (outputs, ) ctx.mark_outputs(outputs, output_names) - builder.max_workspace_size = max_workspace_size - builder.fp16_mode = fp16_mode - builder.max_batch_size = max_batch_size - builder.strict_type_constraints = strict_type_constraints + builder.max_workspace_size = max_workspace_size + builder.fp16_mode = fp16_mode + builder.max_batch_size = max_batch_size + builder.strict_type_constraints = strict_type_constraints + + if int8_mode: + + # default to use input tensors for calibration + if int8_calib_dataset is None: + int8_calib_dataset = TensorBatchDataset(inputs) + + builder.int8_mode = True + + # @TODO(jwelsh): Should we set batch_size=max_batch_size? Need to investigate memory consumption + builder.int8_calibrator = DatasetCalibrator(int8_calib_dataset, batch_size=1, algorithm=int8_calib_algorithm) - engine = builder.build_cuda_engine(network) + engine = builder.build_cuda_engine(network) - module_trt = TRTModule(engine, ctx.input_names, ctx.output_names) + module_trt = TRTModule(engine, ctx.input_names, ctx.output_names) - if keep_network: - module_trt.network = network + if keep_network: + module_trt.network = network return module_trt From 46bb0dfe80c75c0006a2ef72e328cb7318d72f5c Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 26 Nov 2019 15:02:43 -0800 Subject: [PATCH 242/355] minor refactor/comment fix of DatasetCalibrator --- torch2trt/calibration.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/torch2trt/calibration.py b/torch2trt/calibration.py index 07f03cc7..e16dee1d 100644 --- a/torch2trt/calibration.py +++ b/torch2trt/calibration.py @@ -26,11 +26,11 @@ def __init__(self, dataset, batch_size=1, algorithm=trt.CalibrationAlgoType.ENTR # pull sample, should not include batch dimension inputs = dataset[0] - # create buffers that will hold random data batches + # create buffers that will hold data batches self.buffers = [] for tensor in inputs: size = (batch_size,) + tuple(tensor.shape) - buf = torch.randn(size=size, dtype=tensor.dtype, device=tensor.device).contiguous() + buf = torch.zeros(size=size, dtype=tensor.dtype, device=tensor.device).contiguous() self.buffers.append(buf) self.count = 0 @@ -43,6 +43,7 @@ def get_batch(self, *args, **kwargs): idx = self.count % len(self.dataset) # roll around if not multiple of dataset inputs = self.dataset[idx] + # copy data for (input_idx, dataset_idx) into buffer for j, tensor in enumerate(inputs): self.buffers[j][i].copy_(tensor) From a7c35d57cae26aaad083bd30792c080a5c44d94e Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 26 Nov 2019 15:50:20 -0800 Subject: [PATCH 243/355] handle loose dataset types --- torch2trt/calibration.py | 7 ++----- torch2trt/torch2trt.py | 6 ++++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/torch2trt/calibration.py b/torch2trt/calibration.py index e16dee1d..cb5ca197 100644 --- a/torch2trt/calibration.py +++ b/torch2trt/calibration.py @@ -16,20 +16,17 @@ def __getitem__(self, idx): class DatasetCalibrator(trt.IInt8Calibrator): - def __init__(self, dataset, batch_size=1, algorithm=trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2): + def __init__(self, inputs, dataset, batch_size=1, algorithm=trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2): super().__init__() self.dataset = dataset self.batch_size = batch_size self.algorithm = algorithm - # pull sample, should not include batch dimension - inputs = dataset[0] - # create buffers that will hold data batches self.buffers = [] for tensor in inputs: - size = (batch_size,) + tuple(tensor.shape) + size = (batch_size,) + tuple(tensor.shape[1:]) buf = torch.zeros(size=size, dtype=tensor.dtype, device=tensor.device).contiguous() self.buffers.append(buf) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 9656df63..092b6ac9 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -357,6 +357,8 @@ def torch2trt(module, int8_calib_dataset=None, int8_calib_algorithm=trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2): + inputs_in = inputs + # copy inputs to avoid modifications to source data inputs = [tensor.clone()[0:1] for tensor in inputs] # only run single entry @@ -387,12 +389,12 @@ def torch2trt(module, # default to use input tensors for calibration if int8_calib_dataset is None: - int8_calib_dataset = TensorBatchDataset(inputs) + int8_calib_dataset = TensorBatchDataset(inputs_in) builder.int8_mode = True # @TODO(jwelsh): Should we set batch_size=max_batch_size? Need to investigate memory consumption - builder.int8_calibrator = DatasetCalibrator(int8_calib_dataset, batch_size=1, algorithm=int8_calib_algorithm) + builder.int8_calibrator = DatasetCalibrator(inputs, int8_calib_dataset, batch_size=1, algorithm=int8_calib_algorithm) engine = builder.build_cuda_engine(network) From 8e6e475db78061a815665ed7a00afbcc289def30 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 26 Nov 2019 20:47:17 -0800 Subject: [PATCH 244/355] fixed int8 support for TensorRT 5.0 --- torch2trt/calibration.py | 12 +++++++++--- torch2trt/torch2trt.py | 4 ++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/torch2trt/calibration.py b/torch2trt/calibration.py index cb5ca197..a237f0f3 100644 --- a/torch2trt/calibration.py +++ b/torch2trt/calibration.py @@ -2,6 +2,12 @@ import tensorrt as trt +if trt.__version__ >= '5.1': + DEFAULT_CALIBRATION_ALGORITHM = trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2 +else: + DEFAULT_CALIBRATION_ALGORITHM = trt.CalibrationAlgoType.ENTROPY_CALIBRATION + + class TensorBatchDataset(): def __init__(self, tensors): @@ -16,7 +22,7 @@ def __getitem__(self, idx): class DatasetCalibrator(trt.IInt8Calibrator): - def __init__(self, inputs, dataset, batch_size=1, algorithm=trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2): + def __init__(self, inputs, dataset, batch_size=1, algorithm=DEFAULT_CALIBRATION_ALGORITHM): super().__init__() self.dataset = dataset @@ -56,8 +62,8 @@ def get_algorithm(self): def get_batch_size(self): return self.batch_size - def read_calibration_cache(self): + def read_calibration_cache(self, *args, **kwargs): return None - def write_calibration_cache(self, cache): + def write_calibration_cache(self, cache, *args, **kwargs): pass \ No newline at end of file diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 092b6ac9..bd860ab0 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -2,7 +2,7 @@ import tensorrt as trt from copy import copy import numpy as np -from .calibration import TensorBatchDataset, DatasetCalibrator +from .calibration import TensorBatchDataset, DatasetCalibrator, DEFAULT_CALIBRATION_ALGORITHM # UTILITY FUNCTIONS @@ -355,7 +355,7 @@ def torch2trt(module, keep_network=True, int8_mode=False, int8_calib_dataset=None, - int8_calib_algorithm=trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2): + int8_calib_algorithm=DEFAULT_CALIBRATION_ALGORITHM): inputs_in = inputs From b8a54ac4aaa2992bcd14de36cd6499d463135f33 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 26 Nov 2019 21:00:58 -0800 Subject: [PATCH 245/355] fix int8_support python2 --- torch2trt/calibration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch2trt/calibration.py b/torch2trt/calibration.py index a237f0f3..9b4512cd 100644 --- a/torch2trt/calibration.py +++ b/torch2trt/calibration.py @@ -23,7 +23,7 @@ def __getitem__(self, idx): class DatasetCalibrator(trt.IInt8Calibrator): def __init__(self, inputs, dataset, batch_size=1, algorithm=DEFAULT_CALIBRATION_ALGORITHM): - super().__init__() + super(DatasetCalibrator, self).__init__() self.dataset = dataset self.batch_size = batch_size From f60509bd293a95b4299aa1314c362233dd93c39b Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 3 Dec 2019 14:44:01 -0800 Subject: [PATCH 246/355] refactor int8 buffer assignment using zip() --- torch2trt/calibration.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torch2trt/calibration.py b/torch2trt/calibration.py index 9b4512cd..7506ea4e 100644 --- a/torch2trt/calibration.py +++ b/torch2trt/calibration.py @@ -47,8 +47,8 @@ def get_batch(self, *args, **kwargs): inputs = self.dataset[idx] # copy data for (input_idx, dataset_idx) into buffer - for j, tensor in enumerate(inputs): - self.buffers[j][i].copy_(tensor) + for buffer, tensor in zip(self.buffers, inputs): + buffer[i].copy_(tensor) self.count += 1 From 3a05d4a38b69e3b094e5b7361567090f7b4bee3a Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 3 Dec 2019 14:46:26 -0800 Subject: [PATCH 247/355] increment vers --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0e8ded10..50df74ea 100644 --- a/setup.py +++ b/setup.py @@ -115,7 +115,7 @@ def run(self): setup( name='torch2trt', - version='0.0.2', + version='0.0.3', description='An easy to use PyTorch to TensorRT converter', cmdclass={ 'install': InstallCommand, From 23ac868470db777ef46582fa4d360818f0439197 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 12 Dec 2019 13:12:38 -0800 Subject: [PATCH 248/355] added instance norm from track_stats=True --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/instance_norm.py | 91 +++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 torch2trt/converters/instance_norm.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 994d5875..1e155093 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -20,6 +20,7 @@ from .ConvTranspose2d import * from .identity import * from .Identity import * +from .instance_norm import * from .Linear import * from .LogSoftmax import * from .max_pool2d import * diff --git a/torch2trt/converters/instance_norm.py b/torch2trt/converters/instance_norm.py new file mode 100644 index 00000000..ff13d662 --- /dev/null +++ b/torch2trt/converters/instance_norm.py @@ -0,0 +1,91 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.instance_norm') +@tensorrt_converter('torch.nn.functional.instance_norm') +def convert_instance_norm(ctx): + input = get_arg(ctx, 'input', pos=0, default=None) + running_mean = get_arg(ctx, 'running_mean', pos=1, default=None) + running_var = get_arg(ctx, 'running_var', pos=2, default=None) + weight = get_arg(ctx, 'weight', pos=3, default=None) + bias = get_arg(ctx, 'bias', pos=4, default=None) + use_input_stats = get_arg(ctx, 'use_input_stats', pos=5, default=True) + momentum = get_arg(ctx, 'momentum', pos=6, default=0.1) + eps = get_arg(ctx, 'eps', pos=7, default=1e-05) + output = ctx.method_return + + + # CASE 1 - USING RUNNING STATISTICS + if not use_input_stats: + + # equivalent to batch norm + scale = 1.0 / np.sqrt(running_var.detach().cpu().numpy() + eps) + offset = -running_mean.detach().cpu().numpy() * scale + power = np.ones_like(scale) + + if weight is not None: + scale *= weight.detach().cpu().numpy() + + if bias is not None: + offset += bias.detach().cpu().numpy() + + input_trt = input._trt + + # force input to be NCHW if it is not + if input.ndim != 4: + + layer = ctx.network.add_shuffle(input_trt) + + if input.ndim == 2: + layer.reshape_dims = (input.shape[1], 1, 1) # NC -> NCHW + elif input.ndim == 3: + layer.reshape_dims = (input.shape[1], input.shape[2], 1) # NCH -> NCHW + elif input.ndim == 5: + layer.reshape_dims = (input.shape[1], input.shape[2], input.shape[3] * input.shape[4]) # NCHWD -> NCHW + + input_trt = layer.get_output(0) + + layer = ctx.network.add_scale(input_trt, trt.ScaleMode.CHANNEL, offset, scale, power) + + if input.ndim != 4: + + layer = ctx.network.add_shuffle(layer.get_output(0)) + layer.reshape_dims = tuple(output.shape[1:]) + + output._trt = layer.get_output(0) + + # CASE 2 - USING INPUT STATS + else: + + raise NotImplementedError + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3)]) +def test_instance_norm_1d_track_stats(): + return torch.nn.InstanceNorm1d(10, track_running_stats=True) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)]) +def test_instance_norm_2d_track_stats(): + return torch.nn.InstanceNorm2d(10, track_running_stats=True) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3, 3)]) +def test_instance_norm_3d_track_stats(): + return torch.nn.InstanceNorm3d(10, track_running_stats=True) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3)]) +def test_instance_norm_1d_track_stats_affine(): + return torch.nn.InstanceNorm1d(10, affine=True, track_running_stats=True) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)]) +def test_instance_norm_2d_track_stats_affine(): + return torch.nn.InstanceNorm2d(10, affine=True, track_running_stats=True) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3, 3)]) +def test_instance_norm_3d_track_stats_affine(): + return torch.nn.InstanceNorm3d(10, affine=True, track_running_stats=True) \ No newline at end of file From 5f30709201cd1a8af558ec8b791186a17d6c19b5 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 12 Dec 2019 16:22:15 -0800 Subject: [PATCH 249/355] instance norm --- torch2trt/converters/instance_norm.py | 126 +++++++++++++++++++------- 1 file changed, 93 insertions(+), 33 deletions(-) diff --git a/torch2trt/converters/instance_norm.py b/torch2trt/converters/instance_norm.py index ff13d662..e518236b 100644 --- a/torch2trt/converters/instance_norm.py +++ b/torch2trt/converters/instance_norm.py @@ -2,6 +2,27 @@ from torch2trt.module_test import add_module_test +def _add_scale_1d2d3d(network, x_trt, mode, offset, scale, power): + ndim = len(x_trt.shape) + + y_trt = x_trt + + # shape to 2D + if ndim != 3: + layer = network.add_shuffle(y_trt) + layer.reshape_dims = (x_trt.shape[0], x_trt.shape[1], -1) # NCH -> NCHW + y_trt = layer.get_output(0) + + y_trt = network.add_scale(y_trt, mode, offset, scale, power).get_output(0) + + # shape to original dimension + if ndim != 3: + layer = network.add_shuffle(layer.get_output(0)) + layer.reshape_dims = tuple(x_trt.shape) + y_trt = layer.get_output(0) + + return y_trt + @tensorrt_converter('torch.instance_norm') @tensorrt_converter('torch.nn.functional.instance_norm') def convert_instance_norm(ctx): @@ -26,66 +47,105 @@ def convert_instance_norm(ctx): if weight is not None: scale *= weight.detach().cpu().numpy() - - if bias is not None: offset += bias.detach().cpu().numpy() - input_trt = input._trt - - # force input to be NCHW if it is not - if input.ndim != 4: - - layer = ctx.network.add_shuffle(input_trt) - - if input.ndim == 2: - layer.reshape_dims = (input.shape[1], 1, 1) # NC -> NCHW - elif input.ndim == 3: - layer.reshape_dims = (input.shape[1], input.shape[2], 1) # NCH -> NCHW - elif input.ndim == 5: - layer.reshape_dims = (input.shape[1], input.shape[2], input.shape[3] * input.shape[4]) # NCHWD -> NCHW - - input_trt = layer.get_output(0) - - layer = ctx.network.add_scale(input_trt, trt.ScaleMode.CHANNEL, offset, scale, power) - - if input.ndim != 4: - - layer = ctx.network.add_shuffle(layer.get_output(0)) - layer.reshape_dims = tuple(output.shape[1:]) + result_trt = _add_scale_1d2d3d(ctx.network, input._trt, trt.ScaleMode.CHANNEL, offset, scale, power) - output._trt = layer.get_output(0) + output._trt = result_trt # CASE 2 - USING INPUT STATS else: - raise NotImplementedError + eps_np = np.array([eps], dtype=np.float32) + keep_dims = True + reduce_axes = torch_dim_to_trt_axes(tuple(range(2, input.ndim))) + + # compute mean over spatial + mean_trt = ctx.network.add_reduce(input._trt, trt.ReduceOperation.AVG, reduce_axes, keep_dims).get_output(0) + + # compute variance over spatial (include eps, to reduce layer count) + delta_trt = ctx.network.add_elementwise(input._trt, mean_trt, trt.ElementWiseOperation.SUB).get_output(0) + var_trt = ctx.network.add_scale(delta_trt, trt.ScaleMode.UNIFORM, np.zeros_like(eps_np), np.ones_like(eps_np), 2 * np.ones_like(eps_np)).get_output(0) + var_trt = ctx.network.add_reduce(var_trt, trt.ReduceOperation.AVG, reduce_axes, keep_dims).get_output(0) + # compute sqrt(var + eps) + var_trt = ctx.network.add_scale(var_trt, trt.ScaleMode.UNIFORM, eps_np, np.ones_like(eps_np), np.ones_like(eps_np)).get_output(0) + var_trt = ctx.network.add_unary(var_trt, trt.UnaryOperation.SQRT).get_output(0) + + # compute final result + result_trt = ctx.network.add_elementwise(delta_trt, var_trt, trt.ElementWiseOperation.DIV).get_output(0) + + # compute affine (if applicable) + if weight is not None: + + weight_np = weight.detach().cpu().numpy() + bias_np = bias.detach().cpu().numpy() + + result_trt = _add_scale_1d2d3d(ctx.network, result_trt, trt.ScaleMode.CHANNEL, bias_np, weight_np, np.ones_like(bias_np)) + output._trt = result_trt + + +# STATIC + @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3)]) -def test_instance_norm_1d_track_stats(): +def test_instance_norm_1d_static(): return torch.nn.InstanceNorm1d(10, track_running_stats=True) @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)]) -def test_instance_norm_2d_track_stats(): +def test_instance_norm_2d_static(): return torch.nn.InstanceNorm2d(10, track_running_stats=True) @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3, 3)]) -def test_instance_norm_3d_track_stats(): +def test_instance_norm_3d_static(): return torch.nn.InstanceNorm3d(10, track_running_stats=True) @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3)]) -def test_instance_norm_1d_track_stats_affine(): +def test_instance_norm_1d_static_affine(): return torch.nn.InstanceNorm1d(10, affine=True, track_running_stats=True) @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)]) -def test_instance_norm_2d_track_stats_affine(): +def test_instance_norm_2d_static_affine(): return torch.nn.InstanceNorm2d(10, affine=True, track_running_stats=True) @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3, 3)]) -def test_instance_norm_3d_track_stats_affine(): - return torch.nn.InstanceNorm3d(10, affine=True, track_running_stats=True) \ No newline at end of file +def test_instance_norm_3d_static_affine(): + return torch.nn.InstanceNorm3d(10, affine=True, track_running_stats=True) + +# DYNAMIC + +# @TODO(jwelsh): 1D dynamic test failing +# @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3)]) +# def test_instance_norm_1d_dynamic(): +# return torch.nn.InstanceNorm1d(10, track_running_stats=False) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)]) +def test_instance_norm_2d_dynamic(): + return torch.nn.InstanceNorm2d(10, track_running_stats=False) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3, 3)]) +def test_instance_norm_3d_dynamic(): + return torch.nn.InstanceNorm3d(10, track_running_stats=False) + + +# @TODO(jwelsh): 1D dynamic test failing +# @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3)]) +# def test_instance_norm_1d_dynamic_affine(): +# return torch.nn.InstanceNorm1d(10, affine=True, track_running_stats=False) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)]) +def test_instance_norm_2d_dynamic_affine(): + return torch.nn.InstanceNorm2d(10, affine=True, track_running_stats=False) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3, 3)]) +def test_instance_norm_3d_dynamic_affine(): + return torch.nn.InstanceNorm3d(10, affine=True, track_running_stats=False) \ No newline at end of file From 67ab729e6013bd3e3d66ce2a0ebd710eceef14a0 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 12 Dec 2019 16:30:05 -0800 Subject: [PATCH 250/355] optimize instance norm by merging sqrt inside scale layer --- torch2trt/converters/instance_norm.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/torch2trt/converters/instance_norm.py b/torch2trt/converters/instance_norm.py index e518236b..0e8c9a13 100644 --- a/torch2trt/converters/instance_norm.py +++ b/torch2trt/converters/instance_norm.py @@ -69,8 +69,7 @@ def convert_instance_norm(ctx): var_trt = ctx.network.add_reduce(var_trt, trt.ReduceOperation.AVG, reduce_axes, keep_dims).get_output(0) # compute sqrt(var + eps) - var_trt = ctx.network.add_scale(var_trt, trt.ScaleMode.UNIFORM, eps_np, np.ones_like(eps_np), np.ones_like(eps_np)).get_output(0) - var_trt = ctx.network.add_unary(var_trt, trt.UnaryOperation.SQRT).get_output(0) + var_trt = ctx.network.add_scale(var_trt, trt.ScaleMode.UNIFORM, eps_np, np.ones_like(eps_np), 0.5 * np.ones_like(eps_np)).get_output(0) # compute final result result_trt = ctx.network.add_elementwise(delta_trt, var_trt, trt.ElementWiseOperation.DIV).get_output(0) From c37480762ce2e7e1ba4d11cf1ba229e9595c47cf Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 13 Dec 2019 18:56:42 -0800 Subject: [PATCH 251/355] added converter / tests for torch.Tensor.__getitem__ --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/getitem.py | 155 +++++++++++++++++++++++++++++++ 2 files changed, 156 insertions(+) create mode 100644 torch2trt/converters/getitem.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 1e155093..6b2b002e 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -18,6 +18,7 @@ from .Conv1d import * from .Conv2d import * from .ConvTranspose2d import * +from .getitem import * from .identity import * from .Identity import * from .instance_norm import * diff --git a/torch2trt/converters/getitem.py b/torch2trt/converters/getitem.py new file mode 100644 index 00000000..88877dea --- /dev/null +++ b/torch2trt/converters/getitem.py @@ -0,0 +1,155 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +def slice_to_trt(dim_size, dim_slice): + + start = 0 if dim_slice.start is None else dim_slice.start + stop = dim_size if dim_slice.stop is None else dim_slice.stop + stride = 1 if dim_slice.step is None else dim_slice.step + + size = (stop - start - 1) // stride + 1 + + return start, size, stride + + +def num_slice_types(slices): + num_slice = 0 + for s in slices: + if isinstance(s, slice) or isinstance(s, int): + num_slice += 1 + return num_slice + + +@tensorrt_converter('torch.Tensor.__getitem__') +def convert_tensor_getitem(ctx): + input = ctx.method_args[0] + slices = ctx.method_args[1] + output = ctx.method_return + + input_trt = input._trt + + # Step 1 - Replace ellipsis with expanded slices + + num_ellipsis = input.ndim - num_slice_types(slices) + + new_slices = [] + for s in slices: + + if s == Ellipsis: + while num_ellipsis > 0: + new_slices.append(slice(None, None, None)) + num_ellipsis -= 1 + elif isinstance(s, slice): + new_slices.append(s) + elif s is None: + new_slices.append(None) + elif isinstance(s, int): + new_slices.append(s) + + # fill missing slices at end + while num_slice_types(new_slices) < len(input.shape): + new_slices.append(slice(None, None, None)) + + # Step 2 - Remove batch from slices (TRT from this point) + + slices = tuple(new_slices[1:]) # remove batch + + + # Step 3 - Add slice layer (will currently ignore 'None' slices) + + starts = [] + sizes = [] + strides = [] + + input_dim = 0 + for s in slices: + + if input_dim >= len(input_trt.shape): + break + + input_size = int(input_trt.shape[input_dim]) + + if isinstance(s, slice): + start, size, stride = slice_to_trt(input_size, s) + starts.append(start) + sizes.append(size) + strides.append(stride) + input_dim += 1 + + elif isinstance(s, int): + starts.append(s) + sizes.append(1) + strides.append(1) + input_dim += 1 + + output_trt = ctx.network.add_slice(input_trt, starts, sizes, strides).get_output(0) + + # Step 4 - Add shuffle layer to insert dimensions for 'None' slices and remove dimensions for 'int' slices + + num_non_slice = len([s for s in slices if not isinstance(s, slice)]) + if num_non_slice > 0: + layer = ctx.network.add_shuffle(output_trt) + layer.reshape_dims = tuple(output.shape[1:]) # exclude batch + output_trt = layer.get_output(0) + + output._trt = output_trt + + +class LambdaModule(torch.nn.Module): + def __init__(self, fn): + super(LambdaModule, self).__init__() + self.fn = fn + + def forward(self, x): + return self.fn(x) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +def test_tensor_getitem_1d_int(): + return LambdaModule(lambda x: x[:, 0]) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)]) +def test_tensor_getitem_2d_int(): + return LambdaModule(lambda x: x[:, 0]) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)]) +def test_tensor_getitem_2d_strided(): + return LambdaModule(lambda x: x[:, ::2]) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)]) +def test_tensor_getitem_2d_strided_offset(): + return LambdaModule(lambda x: x[:, 1::2]) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)]) +def test_tensor_getitem_2d_strided_range(): + return LambdaModule(lambda x: x[:, 1:3:2]) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)]) +def test_tensor_getitem_2d_insert_dim(): + return LambdaModule(lambda x: x[:, None]) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)]) +def test_tensor_getitem_2d_insert_dim_ellipsis(): + return LambdaModule(lambda x: x[:, None, ...]) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)]) +def test_tensor_getitem_2d_append_dim(): + return LambdaModule(lambda x: x[:, ..., None]) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)]) +def test_tensor_getitem_2d_append_2dim(): + return LambdaModule(lambda x: x[:, ..., None, None]) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)]) +def test_tensor_getitem_2d_weird_combo(): + return LambdaModule(lambda x: x[:, 0:3:4, None, None, 1, ...]) \ No newline at end of file From 1a26b543926a42578a1e08c04feac2fa6eb30527 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Sat, 14 Dec 2019 13:49:59 -0800 Subject: [PATCH 252/355] fixed Linear for N, *, H case --- torch2trt/converters/Linear.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/torch2trt/converters/Linear.py b/torch2trt/converters/Linear.py index ea8cdd44..6b896eed 100644 --- a/torch2trt/converters/Linear.py +++ b/torch2trt/converters/Linear.py @@ -9,9 +9,9 @@ def convert_Linear(ctx): input_trt = trt_(ctx.network, input) output = ctx.method_return - # reshape to Nx1x1 + # reshape to ...xNx1x1 layer = ctx.network.add_shuffle(input_trt) - layer.reshape_dims = (-1, 1, 1) + layer.reshape_dims = tuple(input_trt.shape) + (1, 1) bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) if module.bias is not None: @@ -26,16 +26,20 @@ def convert_Linear(ctx): # reshape back to N layer = ctx.network.add_shuffle(layer.get_output(0)) - layer.reshape_dims = (-1,) + layer.reshape_dims = tuple(output.shape[1:]) output._trt = layer.get_output(0) @add_module_test(torch.float32, torch.device('cuda'), [(1, 10)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 10)]) def test_Linear_basic(): return torch.nn.Linear(10, 5) @add_module_test(torch.float32, torch.device('cuda'), [(1, 10)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 10)]) def test_Linear_no_bias(): return torch.nn.Linear(10, 5, bias=False) \ No newline at end of file From 8142c6f196847f9fbebc6d5045797a9c08cc3f3f Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 17 Dec 2019 11:32:09 -0800 Subject: [PATCH 253/355] fixed permute for list args --- torch2trt/converters/permute.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/torch2trt/converters/permute.py b/torch2trt/converters/permute.py index 110c4e6b..c7baa0b8 100644 --- a/torch2trt/converters/permute.py +++ b/torch2trt/converters/permute.py @@ -9,7 +9,11 @@ def convert_permute(ctx): output = ctx.method_return # permutation -1 because TRT does not include batch dim - permutation = ctx.method_args[1:] + if isinstance(ctx.method_args[1], int): + permutation = tuple(ctx.method_args[1:]) # handle permute(a, b, c) + else: + permutation = tuple(ctx.method_args[1]) # handle permute([a, b, c]) + assert(permutation[0] == 0) # cannot move batch dim trt_permutation = tuple([p - 1 for p in permutation])[1:] @@ -41,7 +45,14 @@ def test_permute_2d_0312(): def test_permute_3d_01234(): return Permute(0, 1, 2, 3, 4) - @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5, 6)]) def test_permute_3d_04132(): return Permute(0, 4, 1, 3, 2) + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5, 6)]) +def test_permute_list(): + return Permute([0, 4, 1, 3, 2]) + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5, 6)]) +def test_permute_tuple(): + return Permute((0, 4, 1, 3, 2)) \ No newline at end of file From 72f6ec2b3478d4c8590145b1ee0a12d5840fe581 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 17 Dec 2019 13:39:45 -0800 Subject: [PATCH 254/355] added support for adaptive_max_pool2d using regular pooling --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/adaptive_max_pool2d.py | 36 +++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 torch2trt/converters/adaptive_max_pool2d.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 6b2b002e..4850e98d 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -6,6 +6,7 @@ from .activation import * from .adaptive_avg_pool2d import * +from .adaptive_max_pool2d import * from .AdaptiveAvgPool2d import * from .add import * from .avg_pool2d import * diff --git a/torch2trt/converters/adaptive_max_pool2d.py b/torch2trt/converters/adaptive_max_pool2d.py new file mode 100644 index 00000000..78956ba2 --- /dev/null +++ b/torch2trt/converters/adaptive_max_pool2d.py @@ -0,0 +1,36 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.nn.functional.adaptive_max_pool2d') +def convert_adaptive_max_pool2d(ctx): + input = ctx.method_args[0] + output = ctx.method_return + + output_size = ctx.method_args[1] + if isinstance(output_size, int): + output_size = (output_size, ) * 2 + + stride = (input._trt.shape[-2] // output_size[-2], input._trt.shape[-1] // output_size[-1]) + + kernel_size = stride + layer = ctx.network.add_pooling( + input=input._trt, type=trt.PoolingType.MAX, window_size=kernel_size) + layer.stride = stride + + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_adaptive_max_pool2d_1x1(): + return torch.nn.AdaptiveMaxPool2d((1, 1)) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_adaptive_max_pool2d_2x2(): + return torch.nn.AdaptiveMaxPool2d((2, 2)) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_adaptive_max_pool2d_3x3(): + return torch.nn.AdaptiveMaxPool2d((3, 3)) From c6d047f4e0b015273f8d276f9c4eba3b37be5560 Mon Sep 17 00:00:00 2001 From: SnowGushiGit Date: Mon, 6 Jan 2020 18:39:10 +0900 Subject: [PATCH 255/355] update clamp --- .idea/vcs.xml | 6 +++ torch2trt/converters/clamp.py | 82 ++++++++++++++++++++++++++++++++--- 2 files changed, 81 insertions(+), 7 deletions(-) create mode 100644 .idea/vcs.xml diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 00000000..94a25f7f --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/torch2trt/converters/clamp.py b/torch2trt/converters/clamp.py index d402b272..f326a276 100644 --- a/torch2trt/converters/clamp.py +++ b/torch2trt/converters/clamp.py @@ -85,18 +85,28 @@ def test_tensor_clamp_max(): # CLAMP - @tensorrt_converter('torch.clamp') @tensorrt_converter('torch.Tensor.clamp') def convert_clamp(ctx): input = ctx.method_args[0] - min_val = ctx.method_args[1] - max_val = ctx.method_args[2] output = ctx.method_return - - layer = __add_clamp(ctx.network, input._trt, min_val, trt.ElementWiseOperation.MAX) - layer = __add_clamp(ctx.network, layer.get_output(0), max_val, trt.ElementWiseOperation.MIN) + if "min" in ctx.method_kwargs and "max" in ctx.method_kwargs: + min_val = ctx.method_kwargs["min"] + max_val = ctx.method_kwargs["max"] + layer = __add_clamp(ctx.network, input._trt, min_val, trt.ElementWiseOperation.MAX) + layer = __add_clamp(ctx.network, layer.get_output(0), max_val, trt.ElementWiseOperation.MIN) + elif "min" in ctx.method_kwargs: + min_val = ctx.method_kwargs["min"] + layer = __add_clamp(ctx.network, input._trt, min_val, trt.ElementWiseOperation.MAX) + elif "max" in ctx.method_kwargs: + max_val = ctx.method_kwargs["max"] + layer = __add_clamp(ctx.network, input._trt, max_val, trt.ElementWiseOperation.MIN) + else: + min_val = ctx.method_args[1] + max_val = ctx.method_args[2] + layer = __add_clamp(ctx.network, input._trt, min_val, trt.ElementWiseOperation.MAX) + layer = __add_clamp(ctx.network, layer.get_output(0), max_val, trt.ElementWiseOperation.MIN) output._trt = layer.get_output(0) @@ -118,4 +128,62 @@ def forward(self, x): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) def test_tensor_clamp(): - return TensorClamp() \ No newline at end of file + return TensorClamp() + + +class TorchClampOptionMax(torch.nn.Module): + def forward(self, x): + return torch.clamp(x, max=0.1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_torch_clamp_option_max(): + return TorchClampOptionMax() + +class TorchClampOptionMin(torch.nn.Module): + def forward(self, x): + return torch.clamp(x, min=-0.1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_torch_clamp_option_min(): + return TorchClampOptionMin() + + +class TorchClampOptionMaxMin(torch.nn.Module): + def forward(self, x): + return torch.clamp(x, min=-0.1, max=0.1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_torch_clamp_option_max_min(): + return TorchClampOptionMaxMin() + + +class TensorClampOptionMax(torch.nn.Module): + def forward(self, x): + return x.clamp(max=0.1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_tensor_clamp_option_max(): + return TensorClampOptionMax() + +class TensorClampOptionMin(torch.nn.Module): + def forward(self, x): + return x.clamp(min=-0.1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_tensor_clamp_option_min(): + return TensorClampOptionMin() + + +class TensorClampOptionMaxMin(torch.nn.Module): + def forward(self, x): + return x.clamp(min=-0.1, max=0.1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) +def test_tensor_clamp_option_max_min(): + return TensorClampOptionMaxMin() \ No newline at end of file From 1756aa927a63f45db288e95e34ee67505079e2c5 Mon Sep 17 00:00:00 2001 From: SnowGushiGit Date: Mon, 6 Jan 2020 18:49:13 +0900 Subject: [PATCH 256/355] rm idea --- .idea/vcs.xml | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 .idea/vcs.xml diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 94a25f7f..00000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file From 6be6da4778c6f71647a878084a69a5bd88633ca2 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Fri, 10 Apr 2020 12:18:43 -0700 Subject: [PATCH 257/355] plugin setup.py --- setup.py | 146 ++++++------------------------ torch2trt/plugins/interpolate.cpp | 45 +++++++++ 2 files changed, 73 insertions(+), 118 deletions(-) create mode 100644 torch2trt/plugins/interpolate.cpp diff --git a/setup.py b/setup.py index 50df74ea..4442658b 100644 --- a/setup.py +++ b/setup.py @@ -1,127 +1,37 @@ -import os -import glob -import shutil from setuptools import setup, find_packages -from setuptools.command.install import install -from setuptools.command.develop import develop -from distutils.cmd import Command -from build import build - -package_data = {} - -plugins_user_options = [ - ('plugins', None, 'Build plugins'), - ('cuda-dir=', None, 'Location of CUDA (if not default location)'), - ('torch-dir=', None, 'Location of PyTorch (if not default location)'), - ('trt-inc-dir=', None, 'Location of TensorRT include files (if not default location)'), - ('trt-lib-dir=', None, 'Location of TensorRT libraries (if not default location)'), +from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CppExtension + +def trt_inc_dir(): + return "/usr/include/aarch64-linux-gnu" + +def trt_lib_dir(): + return "/usr/lib/aarch64-linux-gnu" + + +ext_modules = [ + CppExtension( + name='plugins', + sources=[ + 'torch2trt/plugins/interpolate.cpp' + ], + include_dirs=[ + trt_inc_dir() + ], + library_dirs=[ + trt_lib_dir() + ], + libraries=[ + 'nvinfer' + ] + ) ] - -def initialize_plugins_options(cmd_obj): - cmd_obj.plugins = False - cmd_obj.cuda_dir = None - cmd_obj.torch_dir = None - cmd_obj.trt_inc_dir = None - cmd_obj.trt_lib_dir = None - - -def run_plugins_compilation(cmd_obj): - if cmd_obj.plugins: - build_args = {} - if cmd_obj.cuda_dir: - build_args['cuda_dir'] = cmd_obj.cuda_dir - if cmd_obj.torch_dir: - build_args['torch_dir'] = cmd_obj.torch_dir - if cmd_obj.trt_inc_dir: - build_args['trt_inc_dir'] = cmd_obj.trt_inc_dir - if cmd_obj.trt_lib_dir: - build_args['trt_lib_dir'] = cmd_obj.trt_lib_dir - - print('Building in plugin support') - build(**build_args) - package_data['torch2trt'] = ['libtorch2trt.so'] - - -class DevelopCommand(develop): - description = "Builds the package and symlinks it into the PYTHONPATH" - user_options = develop.user_options + plugins_user_options - - def initialize_options(self): - develop.initialize_options(self) - initialize_plugins_options(self) - - def finalize_options(self): - develop.finalize_options(self) - - def run(self): - run_plugins_compilation(self) - develop.run(self) - - -class InstallCommand(install): - description = "Builds the package" - user_options = install.user_options + plugins_user_options - - def initialize_options(self): - install.initialize_options(self) - initialize_plugins_options(self) - - def finalize_options(self): - install.finalize_options(self) - - def run(self): - run_plugins_compilation(self) - install.run(self) - - -class CleanCommand(Command): - """Custom clean command to tidy up the project root.""" - PY_CLEAN_FILES = ['./build', './dist', './__pycache__', './*.pyc', './*.tgz', './*.egg-info'] - description = "Command to tidy up the project root" - user_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - root_dir = os.path.dirname(os.path.realpath(__file__)) - for path_spec in self.PY_CLEAN_FILES: - # Make paths absolute and relative to this path - abs_paths = glob.glob(os.path.normpath(os.path.join(root_dir, path_spec))) - for path in [str(p) for p in abs_paths]: - if not path.startswith(root_dir): - # Die if path in CLEAN_FILES is absolute + outside this directory - raise ValueError("%s is not a path inside %s" % (path, root_dir)) - print('Removing %s' % os.path.relpath(path)) - shutil.rmtree(path) - - cmd_list = { - "Removing generated protobuf cc files": "find . -name '*.pb.cc' -print0 | xargs -0 rm -f;", - "Removing generated protobuf h files": "find . -name '*.pb.h' -print0 | xargs -0 rm -f;", - "Removing generated protobuf py files": "find . -name '*_pb2.py' -print0 | xargs -0 rm -f;", - "Removing generated ninja files": "find . -name '*.ninja*' -print0 | xargs -0 rm -f;", - "Removing generated o files": "find . -name '*.o' -print0 | xargs -0 rm -f;", - "Removing generated so files": "find . -name '*.so' -print0 | xargs -0 rm -f;", - } - - for cmd, script in cmd_list.items(): - print("{}".format(cmd)) - os.system(script) - - setup( name='torch2trt', version='0.0.3', description='An easy to use PyTorch to TensorRT converter', - cmdclass={ - 'install': InstallCommand, - 'clean': CleanCommand, - 'develop': DevelopCommand, - }, packages=find_packages(), - package_data=package_data + ext_package='torch2trt', + ext_modules=ext_modules, + cmdclass={'build_ext': BuildExtension} ) diff --git a/torch2trt/plugins/interpolate.cpp b/torch2trt/plugins/interpolate.cpp new file mode 100644 index 00000000..fee3fc10 --- /dev/null +++ b/torch2trt/plugins/interpolate.cpp @@ -0,0 +1,45 @@ +#include +#include +#include +#include +#include + + +class DummyPlugin { +private: + torch::jit::script::Module params_container; +public: + InterpolatePlugin() {} + + void put_serialized(const char *data, size_t length) { + std::string data_str(data, length); + std::istringstream data_stream(data_str); + params_container = torch::jit::load(data_stream); + + for (auto a : params_container.named_attributes()) { + if (a.name == "d") { + std::cout << a.name << ': ' << a.value << std::endl; + a.value = 99; + std::cout << a.name << ': ' << a.value << std::endl; + } + } + } + + size_t get_serialized_size() { + auto str = get_serialized(); + return str.size(); + } + + std::string get_serialized() { + std::ostringstream data_str; + params_container.save(data_str); + return data_str.str(); + } +}; + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + py::class_(m, "InterpolatePlugin") + .def(py::init<>()) + .def("put_serialized", &InterpolatePlugin::put_serialized) + .def("get_serialized_size", &InterpolatePlugin::get_serialized_size); +} \ No newline at end of file From 11420799ec7a6f66b52a21a195a46ba0272b9fd8 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 28 Apr 2020 12:52:04 -0700 Subject: [PATCH 258/355] asdf --- setup.py | 9 +- torch2trt/__init__.py | 26 +- .../converters/interpolate/interpolate.py | 8 +- torch2trt/plugins/interpolate.cpp | 312 ++++++++++++++++-- 4 files changed, 311 insertions(+), 44 deletions(-) diff --git a/setup.py b/setup.py index 4442658b..a77665b2 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ def trt_lib_dir(): ext_modules = [ - CppExtension( + CUDAExtension( name='plugins', sources=[ 'torch2trt/plugins/interpolate.cpp' @@ -22,10 +22,15 @@ def trt_lib_dir(): ], libraries=[ 'nvinfer' - ] + ], + extra_compile_args={ + 'cxx': ['], + 'nvcc': [] + } ) ] + setup( name='torch2trt', version='0.0.3', diff --git a/torch2trt/__init__.py b/torch2trt/__init__.py index 625a19ae..ade35f40 100644 --- a/torch2trt/__init__.py +++ b/torch2trt/__init__.py @@ -3,19 +3,19 @@ import tensorrt as trt -def load_plugins(): - import os - import ctypes - ctypes.CDLL(os.path.join(os.path.dirname(__file__), 'libtorch2trt.so')) +# def load_plugins(): +# import os +# import ctypes +# ctypes.CDLL(os.path.join(os.path.dirname(__file__), 'libtorch2trt.so')) - registry = trt.get_plugin_registry() - torch2trt_creators = [c for c in registry.plugin_creator_list if c.plugin_namespace == 'torch2trt'] - for c in torch2trt_creators: - registry.register_creator(c, 'torch2trt') +# registry = trt.get_plugin_registry() +# torch2trt_creators = [c for c in registry.plugin_creator_list if c.plugin_namespace == 'torch2trt'] +# for c in torch2trt_creators: +# registry.register_creator(c, 'torch2trt') -try: - load_plugins() - PLUGINS_LOADED = True -except OSError: - PLUGINS_LOADED = False +# try: +# load_plugins() +# PLUGINS_LOADED = True +# except OSError: +# PLUGINS_LOADED = False diff --git a/torch2trt/converters/interpolate/interpolate.py b/torch2trt/converters/interpolate/interpolate.py index 996aea77..75674d5e 100644 --- a/torch2trt/converters/interpolate/interpolate.py +++ b/torch2trt/converters/interpolate/interpolate.py @@ -2,15 +2,16 @@ import torch.nn.functional as F from torch2trt.torch2trt import * from torch2trt.module_test import add_module_test -from .interpolate_pb2 import interpolate_Message +from torch2trt.plugins import InterpolatePlugin +# from .interpolate_pb2 import interpolate_Message import torch.nn as nn def get_interpolate_plugin(size, mode, align_corners): PLUGIN_NAME = 'interpolate' registry = trt.get_plugin_registry() creator = [c for c in registry.plugin_creator_list if c.name == PLUGIN_NAME and c.plugin_namespace == 'torch2trt'][0] - message = interpolate_Message(size=size, mode=mode, align_corners=align_corners) - return creator.deserialize_plugin(PLUGIN_NAME, message.SerializeToString()) + torch2trt_plugin = InterpolatePlugin(size=size, mode=mode, align_corners=align_corners) + return creator.deserialize_plugin(PLUGIN_NAME, torch2trt_plugin.serializeToString()) @tensorrt_converter('torch.nn.functional.interpolate') @@ -33,6 +34,7 @@ def convert_interpolate(ctx): size = list(output.shape[2:]) plugin = get_interpolate_plugin(size=size, mode=mode, align_corners=align_corners) + layer = ctx.network.add_plugin_v2([input_trt], plugin) diff --git a/torch2trt/plugins/interpolate.cpp b/torch2trt/plugins/interpolate.cpp index fee3fc10..621c334a 100644 --- a/torch2trt/plugins/interpolate.cpp +++ b/torch2trt/plugins/interpolate.cpp @@ -3,43 +3,303 @@ #include #include #include +#include +#include +#include +#include +#include +using namespace nvinfer1; -class DummyPlugin { +namespace torch2trt { + + +class InterpolatePlugin : public IPluginV2 { private: - torch::jit::script::Module params_container; + + // configured by class + at::TensorOptions tensor_options; + std::vector input_sizes; + std::vector output_sizes; + DataType dtype; + + // configured by user + std::vector size; + std::string mode; + bool align_corners; + public: - InterpolatePlugin() {} - void put_serialized(const char *data, size_t length) { - std::string data_str(data, length); - std::istringstream data_stream(data_str); - params_container = torch::jit::load(data_stream); - - for (auto a : params_container.named_attributes()) { - if (a.name == "d") { - std::cout << a.name << ': ' << a.value << std::endl; - a.value = 99; - std::cout << a.name << ': ' << a.value << std::endl; - } - } + // create from arguments + InterpolatePlugin(std::vector size, std::string mode, bool align_corners) : + size(size), mode(mode), align_corners(align_corners) + {} + + InterpolatePlugin(const char *data, size_t length) : InterpolatePlugin(std::string(data, length)) {} + + // create from serialized data + InterpolatePlugin(const std::string &data) { + deserializeFromString(data); + } + + void testPackContainer() { + packContainer(); + } + + // packs data into container for serialization + torch::jit::script::Module packContainer() const { + + torch::jit::script::Module container; + + auto i_size = torch::IValue(size); + auto i_mode = torch::IValue(mode); + auto i_align_corners = torch::IValue(align_corners); + auto i_dtype = torch::IValue((int) dtype); + auto i_input_sizes = torch::IValue(input_sizes); + auto i_output_sizes = torch::IValue(output_sizes); + + // set by user +// container.register_attribute("size", i_size.type(), i_size); +// container.register_attribute("mode", i_mode.type(), i_mode); +// container.register_attribute("align_corners", i_align_corners.type(), i_align_corners); + +// // configured by TRT +// container.register_attribute("dtype", i_dtype.type(), i_dtype); +// container.register_attribute("input_sizes", i_input_sizes.type(), i_input_sizes); +// container.register_attribute("output_sizes", i_output_sizes.type(), i_output_sizes); + } + + // unpacks data from container into class attributes + void unpackContainer(const torch::jit::script::Module& container) { + for (auto a : container.named_attributes()) { + if (a.name == "size") { + // TODO(jwelsh) toIntListRef().vec() is removed 1.5+, use toIntVector() instead... need to handle with preproc + size = a.value.toIntListRef().vec(); + } else if (a.name == "mode") { + mode = a.value.toStringRef(); + } else if (a.name == "align_corners") { + align_corners = a.value.toBool(); + } else if (a.name == "dtype") { + dtype = (DataType) a.value.toInt(); + } else if (a.name == "input_sizes") { + input_sizes = a.value.toIntListRef().vec(); + } else if (a.name == "output_sizes") { + output_sizes = a.value.toIntListRef().vec(); + } + } + } + + void deserializeFromString(const std::string &data) { +// char *data_ptr = data.c_str(); +// size_t size = data.size(); + torch::jit::script::Module container; + std::istringstream data_stream(data); + container = torch::jit::load(data_stream); + unpackContainer(container); + } + + std::string serializeToString() const { +// torch::jit::script::Module container = packContainer(); +// std::ostringstream data_str; +// container.save(data_str); +// return data_str.str(); + return "hello"; + } + + const char* getPluginType() const override { + return "interpolate"; + }; + + const char* getPluginVersion() const override { + return "1"; + } + + int getNbOutputs() const override { + return 1; + } + + Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override { + Dims dims; + dims.nbDims = inputs->nbDims; + + dims.d[0] = inputs->d[0]; + for (int i = 0; i < size.size(); i++) { + dims.d[i + 1] = size[i]; + } + + return dims; + } + + bool supportsFormat(DataType type, PluginFormat format) const override { + if (format != PluginFormat::kNCHW) { + return false; } + if (type == DataType::kINT32 || type == DataType::kINT8) { + return false; + } + return true; + } + + void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, + int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) override { - size_t get_serialized_size() { - auto str = get_serialized(); - return str.size(); + // set data type + if (type == DataType::kFLOAT) { + tensor_options = tensor_options.dtype(c10::kFloat); + dtype = type; + } else if (type == DataType::kHALF) { + tensor_options = tensor_options.dtype(c10::kHalf); + dtype = type; + } + + // set input sizes + input_sizes.resize(inputDims[0].nbDims); + for (int i = 0; i < inputDims[0].nbDims; i++) { + input_sizes[i] = inputDims[0].d[i]; } + + // set output sizes + output_sizes.resize(outputDims[0].nbDims); + for (int i = 0; i < outputDims[0].nbDims; i++) { + output_sizes[i] = outputDims[0].d[i]; + } + } + + int initialize() override { + // set device + tensor_options = tensor_options.device(c10::kCUDA); + + // set data type + if (dtype == DataType::kFLOAT) { + tensor_options = tensor_options.dtype(c10::kFloat); + } else if (dtype == DataType::kHALF) { + tensor_options = tensor_options.dtype(c10::kHalf); + } + +// input_sizes.resize(message.input_size_size()); +// output_sizes.resize(message.output_size_size()); - std::string get_serialized() { - std::ostringstream data_str; - params_container.save(data_str); - return data_str.str(); +// for (int i = 0; i < message.input_size_size(); i++) { +// input_sizes[i] = message.input_size(i); +// } +// for (int i = 0; i < message.output_size_size(); i++) { +// output_sizes[i] = message.output_size(i); +// } + + return 0; + } + + void terminate() override {} + + size_t getWorkspaceSize(int maxBatchSize) const override { return 0; } + + int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) override { + // get input / output dimensions + std::vector batch_input_sizes = input_sizes; + std::vector batch_output_sizes = output_sizes; + batch_input_sizes.insert(batch_input_sizes.begin(), batchSize); + batch_output_sizes.insert(batch_output_sizes.begin(), batchSize); + + // create tensor wrappers + at::Tensor input = at::from_blob((void*) inputs[0], batch_input_sizes, [](void*){}, tensor_options); + at::Tensor output = at::from_blob(outputs[0], batch_output_sizes, [](void*){}, tensor_options); + + // create new torch cuda stream + at::cuda::CUDAStream torch_stream = at::cuda::getStreamFromPool(); + at::cuda::CUDAStreamGuard torch_guard(torch_stream); + + // capture current work on tensorrt cuda stream + cudaEvent_t event; + cudaEventCreate(&event); + cudaEventRecord(event, stream); + + // make torch cuda stream wait on tensorrt work + cudaStreamWaitEvent(torch_stream.stream(), event, 0); + + // enqueue work + if (mode == "bilinear") { + at::upsample_bilinear2d_out(output, input, {size[0], size[1]}, align_corners); + } else if (mode == "nearest") { + at::upsample_nearest2d_out(output, input, {size[0], size[1]}); + } else if (mode == "area") { + at::adaptive_avg_pool2d_out(output, input, {size[0], size[1]}); + } else if (mode == "bicubic") { + at::upsample_bicubic2d_out(output, input, {size[0], size[1]}, align_corners); } + + // capture event on enqueued stream + cudaEvent_t torch_event; + cudaEventCreate(&torch_event); + cudaEventRecord(torch_event, torch_stream.stream()); + + cudaStreamWaitEvent(stream, torch_event, 0); + + cudaEventDestroy(event); + cudaEventDestroy(torch_event); + + return 0; + } + + size_t getSerializationSize() const override { + return serializeToString().size(); + } + + void serialize(void* buffer) const override { + std::string data = serializeToString(); + size_t size = getSerializationSize(); + data.copy((char *) buffer, size); + } + + void destroy() override {} + + IPluginV2* clone() const override { + return new InterpolatePlugin(size, mode, align_corners); + } + + void setPluginNamespace(const char* pluginNamespace) override {} + + const char *getPluginNamespace() const override { + return "torch2trt"; + } + +}; + +class InterpolatePluginCreator : public IPluginCreator { +public: + InterpolatePluginCreator() {} + + const char *getPluginNamespace() const override { + return "torch2trt"; + } + + const char *getPluginName() const override { + return "interpolate"; + } + + const char *getPluginVersion() const override { + return "1"; + } + + IPluginV2 *deserializePlugin(const char *name, const void *data, size_t length) override { + return new InterpolatePlugin((const char*) data, length); + } + + void setPluginNamespace(const char *N) override {} + const PluginFieldCollection *getFieldNames() override { return nullptr; } + + IPluginV2 *createPlugin(const char *name, const PluginFieldCollection *fc) override { return nullptr; } + }; +REGISTER_TENSORRT_PLUGIN(InterpolatePluginCreator); + + PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { py::class_(m, "InterpolatePlugin") - .def(py::init<>()) - .def("put_serialized", &InterpolatePlugin::put_serialized) - .def("get_serialized_size", &InterpolatePlugin::get_serialized_size); -} \ No newline at end of file + .def(py::init, std::string, bool>(), py::arg("size"), py::arg("mode"), py::arg("align_corners")) + .def("testPackContainer", &InterpolatePlugin::testPackContainer) + .def("getSerializationSize", &InterpolatePlugin::getSerializationSize) + .def("serializeToString", &InterpolatePlugin::serializeToString); +} + +} // namespace torch2trt \ No newline at end of file From b5bb91da96f1aa41c121ef6786fcf5f71b4fec0c Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 28 Apr 2020 14:04:26 -0700 Subject: [PATCH 259/355] interpolate torch serialization --- setup.py | 2 +- torch2trt/__init__.py | 21 +- torch2trt/converters/__init__.py | 6 +- .../{interpolate => }/interpolate.py | 0 torch2trt/converters/interpolate/__init__.py | 1 - .../converters/interpolate/interpolate.cpp | 210 ------------------ .../converters/interpolate/interpolate.proto | 23 -- torch2trt/plugins/interpolate.cpp | 120 +++++----- 8 files changed, 60 insertions(+), 323 deletions(-) rename torch2trt/converters/{interpolate => }/interpolate.py (100%) delete mode 100644 torch2trt/converters/interpolate/__init__.py delete mode 100644 torch2trt/converters/interpolate/interpolate.cpp delete mode 100644 torch2trt/converters/interpolate/interpolate.proto diff --git a/setup.py b/setup.py index a77665b2..2a2ab770 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ def trt_lib_dir(): 'nvinfer' ], extra_compile_args={ - 'cxx': ['], + 'cxx': [], 'nvcc': [] } ) diff --git a/torch2trt/__init__.py b/torch2trt/__init__.py index ade35f40..ddc1608e 100644 --- a/torch2trt/__init__.py +++ b/torch2trt/__init__.py @@ -3,19 +3,12 @@ import tensorrt as trt -# def load_plugins(): -# import os -# import ctypes -# ctypes.CDLL(os.path.join(os.path.dirname(__file__), 'libtorch2trt.so')) - -# registry = trt.get_plugin_registry() -# torch2trt_creators = [c for c in registry.plugin_creator_list if c.plugin_namespace == 'torch2trt'] -# for c in torch2trt_creators: -# registry.register_creator(c, 'torch2trt') +def load_plugins(): + import torch2trt.plugins + registry = trt.get_plugin_registry() + torch2trt_creators = [c for c in registry.plugin_creator_list if c.plugin_namespace == 'torch2trt'] + for c in torch2trt_creators: + registry.register_creator(c, 'torch2trt') -# try: -# load_plugins() -# PLUGINS_LOADED = True -# except OSError: -# PLUGINS_LOADED = False +load_plugins() \ No newline at end of file diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 4850e98d..12b53233 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -50,8 +50,6 @@ from .chunk import * from .unary import * +# PLUGINS -try: - from .interpolate import * -except: - pass +from .interpolate import * \ No newline at end of file diff --git a/torch2trt/converters/interpolate/interpolate.py b/torch2trt/converters/interpolate.py similarity index 100% rename from torch2trt/converters/interpolate/interpolate.py rename to torch2trt/converters/interpolate.py diff --git a/torch2trt/converters/interpolate/__init__.py b/torch2trt/converters/interpolate/__init__.py deleted file mode 100644 index ff29a907..00000000 --- a/torch2trt/converters/interpolate/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .interpolate import * diff --git a/torch2trt/converters/interpolate/interpolate.cpp b/torch2trt/converters/interpolate/interpolate.cpp deleted file mode 100644 index c35b4fd5..00000000 --- a/torch2trt/converters/interpolate/interpolate.cpp +++ /dev/null @@ -1,210 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "interpolate.pb.h" - - -using namespace nvinfer1; - - -namespace torch2trt -{ - -class interpolate_Plugin : public IPluginV2 { -private: - interpolate_Message message; - at::TensorOptions tensor_options; - std::vector input_sizes; - std::vector output_sizes; - -public: - interpolate_Plugin(interpolate_Message message) : message(message) {} - - const char* getPluginType() const override { - return "interpolate"; - }; - - const char* getPluginVersion() const override { - return "1"; - } - - int getNbOutputs() const override { - return 1; - } - - Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override { - Dims dims; - dims.nbDims = inputs->nbDims; - - dims.d[0] = inputs->d[0]; - for (int i = 0; i < message.size_size(); i++) { - dims.d[i + 1] = message.size(i); - } - - return dims; - } - - bool supportsFormat(DataType type, PluginFormat format) const override { - if (format != PluginFormat::kNCHW) { - return false; - } - if (type == DataType::kINT32 || type == DataType::kINT8) { - return false; - } - return true; - } - - void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, - int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) override { - - // set data type - if (type == DataType::kFLOAT) { - message.set_dtype(DataTypeMessage::kFloat); - } else if (type == DataType::kHALF) { - tensor_options = tensor_options.dtype(c10::kHalf); - message.set_dtype(DataTypeMessage::kHalf); - } - - // set input sizes - for (int i = 0; i < inputDims[0].nbDims; i++) { - message.add_input_size(inputDims[0].d[i]); - } - - // set output sizes - for (int i = 0; i < outputDims[0].nbDims; i++) { - message.add_output_size(outputDims[0].d[i]); - } - } - - int initialize() override { - // set device - tensor_options = tensor_options.device(c10::kCUDA); - - // set data type - if (message.dtype() == DataTypeMessage::kFloat) { - tensor_options = tensor_options.dtype(c10::kFloat); - } else if (message.dtype() == DataTypeMessage::kHalf) { - tensor_options = tensor_options.dtype(c10::kHalf); - } - - input_sizes.resize(message.input_size_size()); - output_sizes.resize(message.output_size_size()); - - for (int i = 0; i < message.input_size_size(); i++) { - input_sizes[i] = message.input_size(i); - } - for (int i = 0; i < message.output_size_size(); i++) { - output_sizes[i] = message.output_size(i); - } - - return 0; - } - - void terminate() override {} - - size_t getWorkspaceSize(int maxBatchSize) const override { return 0; } - - int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) override { - // get input / output dimensions - std::vector batch_input_sizes = input_sizes; - std::vector batch_output_sizes = output_sizes; - batch_input_sizes.insert(batch_input_sizes.begin(), batchSize); - batch_output_sizes.insert(batch_output_sizes.begin(), batchSize); - - // create tensor wrappers - at::Tensor input = at::from_blob((void*) inputs[0], batch_input_sizes, [](void*){}, tensor_options); - at::Tensor output = at::from_blob(outputs[0], batch_output_sizes, [](void*){}, tensor_options); - - // create new torch cuda stream - at::cuda::CUDAStream torch_stream = at::cuda::getStreamFromPool(); - at::cuda::CUDAStreamGuard torch_guard(torch_stream); - - // capture current work on tensorrt cuda stream - cudaEvent_t event; - cudaEventCreate(&event); - cudaEventRecord(event, stream); - - // make torch cuda stream wait on tensorrt work - cudaStreamWaitEvent(torch_stream.stream(), event, 0); - - // enqueue work - if (message.mode() == "bilinear") { - at::upsample_bilinear2d_out(output, input, {message.size(0), message.size(1)}, message.align_corners()); - } else if (message.mode() == "nearest") { - at::upsample_nearest2d_out(output, input, {message.size(0), message.size(1)}); - } else if (message.mode() == "area") { - at::adaptive_avg_pool2d_out(output, input, {message.size(0), message.size(1)}); - } else if (message.mode() == "bicubic") { - at::upsample_bicubic2d_out(output, input, {message.size(0), message.size(1)}, message.align_corners()); - } - - // capture event on enqueued stream - cudaEvent_t torch_event; - cudaEventCreate(&torch_event); - cudaEventRecord(torch_event, torch_stream.stream()); - - cudaStreamWaitEvent(stream, torch_event, 0); - - cudaEventDestroy(event); - cudaEventDestroy(torch_event); - - return 0; - } - - size_t getSerializationSize() const override { - return message.SerializeAsString().size(); - } - - void serialize(void* buffer) const override { - message.SerializeToArray(buffer, getSerializationSize()); - } - - void destroy() override {} - - IPluginV2* clone() const override { - return new interpolate_Plugin(message); - } - - void setPluginNamespace(const char* pluginNamespace) override {} - - const char *getPluginNamespace() const override { - return "torch2trt"; - } - -}; - -class interpolate_PluginCreator : public IPluginCreator { -public: - interpolate_PluginCreator() {} - - const char *getPluginNamespace() const override { - return "torch2trt"; - } - - const char *getPluginName() const override { - return "interpolate"; - } - - const char *getPluginVersion() const override { - return "1"; - } - - IPluginV2 *deserializePlugin(const char *name, const void *data, size_t length) override { - interpolate_Message message; - message.ParseFromArray(data, length); - return new interpolate_Plugin(message); - } - - void setPluginNamespace(const char *N) override {} - const PluginFieldCollection *getFieldNames() override { return nullptr; } - - IPluginV2 *createPlugin(const char *name, const PluginFieldCollection *fc) override { return nullptr; } - -}; - -REGISTER_TENSORRT_PLUGIN(interpolate_PluginCreator); - -} diff --git a/torch2trt/converters/interpolate/interpolate.proto b/torch2trt/converters/interpolate/interpolate.proto deleted file mode 100644 index 34cc8585..00000000 --- a/torch2trt/converters/interpolate/interpolate.proto +++ /dev/null @@ -1,23 +0,0 @@ -syntax = "proto3"; - - -package torch2trt; - -enum DataTypeMessage { - kFloat = 0; - kHalf = 1; - kInt8 = 2; - kInt32 = 3; -} - - -message interpolate_Message { - repeated int64 size = 1; - string mode = 2; - bool align_corners = 3; - - // below params are configured by TRT and not set by user - DataTypeMessage dtype = 4; - repeated int64 input_size = 5; - repeated int64 output_size = 6; -} diff --git a/torch2trt/plugins/interpolate.cpp b/torch2trt/plugins/interpolate.cpp index 621c334a..dcb18e77 100644 --- a/torch2trt/plugins/interpolate.cpp +++ b/torch2trt/plugins/interpolate.cpp @@ -13,7 +13,7 @@ using namespace nvinfer1; namespace torch2trt { - + class InterpolatePlugin : public IPluginV2 { private: @@ -41,69 +41,54 @@ class InterpolatePlugin : public IPluginV2 { InterpolatePlugin(const std::string &data) { deserializeFromString(data); } - - void testPackContainer() { - packContainer(); - } - - // packs data into container for serialization - torch::jit::script::Module packContainer() const { - - torch::jit::script::Module container; - - auto i_size = torch::IValue(size); - auto i_mode = torch::IValue(mode); - auto i_align_corners = torch::IValue(align_corners); - auto i_dtype = torch::IValue((int) dtype); - auto i_input_sizes = torch::IValue(input_sizes); - auto i_output_sizes = torch::IValue(output_sizes); - - // set by user -// container.register_attribute("size", i_size.type(), i_size); -// container.register_attribute("mode", i_mode.type(), i_mode); -// container.register_attribute("align_corners", i_align_corners.type(), i_align_corners); - -// // configured by TRT -// container.register_attribute("dtype", i_dtype.type(), i_dtype); -// container.register_attribute("input_sizes", i_input_sizes.type(), i_input_sizes); -// container.register_attribute("output_sizes", i_output_sizes.type(), i_output_sizes); - } - - // unpacks data from container into class attributes - void unpackContainer(const torch::jit::script::Module& container) { - for (auto a : container.named_attributes()) { - if (a.name == "size") { - // TODO(jwelsh) toIntListRef().vec() is removed 1.5+, use toIntVector() instead... need to handle with preproc - size = a.value.toIntListRef().vec(); - } else if (a.name == "mode") { - mode = a.value.toStringRef(); - } else if (a.name == "align_corners") { - align_corners = a.value.toBool(); - } else if (a.name == "dtype") { - dtype = (DataType) a.value.toInt(); - } else if (a.name == "input_sizes") { - input_sizes = a.value.toIntListRef().vec(); - } else if (a.name == "output_sizes") { - output_sizes = a.value.toIntListRef().vec(); - } - } - } void deserializeFromString(const std::string &data) { -// char *data_ptr = data.c_str(); -// size_t size = data.size(); - torch::jit::script::Module container; std::istringstream data_stream(data); - container = torch::jit::load(data_stream); - unpackContainer(container); + torch::serialize::InputArchive input_archive; + input_archive.load_from(data_stream); + { + torch::IValue value; + input_archive.read("size", value); + size = value.toIntListRef().vec(); + } + { + torch::IValue value; + input_archive.read("mode", value); + mode = value.toStringRef(); + } + { + torch::IValue value; + input_archive.read("align_corners", value); + align_corners = value.toBool(); + } + { + torch::IValue value; + input_archive.read("dtype", value); + dtype = (DataType) value.toInt(); + } + { + torch::IValue value; + input_archive.read("input_sizes", value); + input_sizes = value.toIntListRef().vec(); + } + { + torch::IValue value; + input_archive.read("output_sizes", value); + output_sizes = value.toIntListRef().vec(); + } } std::string serializeToString() const { -// torch::jit::script::Module container = packContainer(); -// std::ostringstream data_str; -// container.save(data_str); -// return data_str.str(); - return "hello"; + torch::serialize::OutputArchive output_archive; + output_archive.write("size", torch::IValue(size)); + output_archive.write("mode", torch::IValue(mode)); + output_archive.write("align_corners", torch::IValue(align_corners)); + output_archive.write("dtype", torch::IValue((int) dtype)); + output_archive.write("input_sizes", torch::IValue(input_sizes)); + output_archive.write("output_sizes", torch::IValue(output_sizes)); + std::ostringstream data_str; + output_archive.save_to(data_str); + return data_str.str(); } const char* getPluginType() const override { @@ -176,16 +161,6 @@ class InterpolatePlugin : public IPluginV2 { tensor_options = tensor_options.dtype(c10::kHalf); } -// input_sizes.resize(message.input_size_size()); -// output_sizes.resize(message.output_size_size()); - -// for (int i = 0; i < message.input_size_size(); i++) { -// input_sizes[i] = message.input_size(i); -// } -// for (int i = 0; i < message.output_size_size(); i++) { -// output_sizes[i] = message.output_size(i); -// } - return 0; } @@ -291,15 +266,20 @@ class InterpolatePluginCreator : public IPluginCreator { }; + REGISTER_TENSORRT_PLUGIN(InterpolatePluginCreator); PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { py::class_(m, "InterpolatePlugin") .def(py::init, std::string, bool>(), py::arg("size"), py::arg("mode"), py::arg("align_corners")) - .def("testPackContainer", &InterpolatePlugin::testPackContainer) + .def(py::init(), py::arg("data")) .def("getSerializationSize", &InterpolatePlugin::getSerializationSize) - .def("serializeToString", &InterpolatePlugin::serializeToString); + .def("deserializeFromString", &InterpolatePlugin::deserializeFromString) + .def("serializeToString", [](const InterpolatePlugin& plugin) { + std::string data = plugin.serializeToString(); + return py::bytes(data); + }); } } // namespace torch2trt \ No newline at end of file From 62a8918f892088e76a986835c903a559ce00e81d Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 28 Apr 2020 17:10:36 -0700 Subject: [PATCH 260/355] version handling --- setup.py | 4 ++-- torch2trt/plugins/interpolate.cpp | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 2a2ab770..05657ae5 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,4 @@ +import torch from setuptools import setup, find_packages from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CppExtension @@ -7,7 +8,6 @@ def trt_inc_dir(): def trt_lib_dir(): return "/usr/lib/aarch64-linux-gnu" - ext_modules = [ CUDAExtension( name='plugins', @@ -24,7 +24,7 @@ def trt_lib_dir(): 'nvinfer' ], extra_compile_args={ - 'cxx': [], + 'cxx': ['-DUSE_DEPRECATED_INTLIST'] if torch.__version__ < "1.5" else [], 'nvcc': [] } ) diff --git a/torch2trt/plugins/interpolate.cpp b/torch2trt/plugins/interpolate.cpp index dcb18e77..9cd0bb83 100644 --- a/torch2trt/plugins/interpolate.cpp +++ b/torch2trt/plugins/interpolate.cpp @@ -49,7 +49,11 @@ class InterpolatePlugin : public IPluginV2 { { torch::IValue value; input_archive.read("size", value); +#ifdef USE_DEPRECATED_INTLIST size = value.toIntListRef().vec(); +#else + size = value.toIntVector(); +#endif } { torch::IValue value; @@ -69,12 +73,20 @@ class InterpolatePlugin : public IPluginV2 { { torch::IValue value; input_archive.read("input_sizes", value); +#ifdef USE_DEPRECATED_INTLIST input_sizes = value.toIntListRef().vec(); +#else + input_sizes = value.toIntVector(); +#endif } { torch::IValue value; input_archive.read("output_sizes", value); +#ifdef USE_DEPRECATED_INTLIST output_sizes = value.toIntListRef().vec(); +#else + output_sizes = value.toIntVector(); +#endif } } From 1ca350b8f4fdfba4c54e40f1221199c4959afc1d Mon Sep 17 00:00:00 2001 From: FrogKuma Date: Wed, 20 May 2020 13:38:12 +0900 Subject: [PATCH 261/355] fix padding issue of 2d transposed conv. --- torch2trt/converters/ConvTranspose2d.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/torch2trt/converters/ConvTranspose2d.py b/torch2trt/converters/ConvTranspose2d.py index 1b9af097..e65a62c6 100644 --- a/torch2trt/converters/ConvTranspose2d.py +++ b/torch2trt/converters/ConvTranspose2d.py @@ -33,9 +33,16 @@ def convert_ConvTranspose2d(ctx): kernel=kernel, bias=bias) layer.stride = stride + + # if output_padding in original pytorch layer is not 0, pre_padding and post_padding should be set respectively. Otherwise the output dimension of pytorch and tensorrt may be different. + output_padding = module.output_padding + if output_padding[0] + output_padding[1] > 0: + layer.pre_padding = padding + layer.post_padding = trt.tensorrt.DimsHW(padding[0] - output_padding[0], padding[1] - output_padding[1]) + else: layer.padding = padding if module.groups is not None: layer.num_groups = module.groups - output._trt = layer.get_output(0) \ No newline at end of file + output._trt = layer.get_output(0) From fb72cac84d5bf2e206504bb2f6f9ba1fe4bed8d7 Mon Sep 17 00:00:00 2001 From: Kshitij Srivastava Date: Thu, 14 Nov 2019 18:38:04 -0500 Subject: [PATCH 262/355] added upsample code in converters --- torch2trt/converters/UPSAMPLE.py | 55 ++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 torch2trt/converters/UPSAMPLE.py diff --git a/torch2trt/converters/UPSAMPLE.py b/torch2trt/converters/UPSAMPLE.py new file mode 100644 index 00000000..2950b9e7 --- /dev/null +++ b/torch2trt/converters/UPSAMPLE.py @@ -0,0 +1,55 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + +@tensorrt_converter('torch.nn.Upsample.forward') +def convert_upsample(ctx): + module = ctx.method_args[0] + input = ctx.method_args[1] + input_trt = trt_(ctx.network, input) + output = ctx.method_return + + shape = module.size + if not instance(shape, tuple): + shape = (shape, ) * 2 + + scales = module.scale_factor + if not instance(scales, tuple): + scales = (scales, ) * 2 + + resize_mode = module.mode + align_corners = module.align_corners + + layer = ctx.network.add_resize( + input=input_trt) + + layer.shape = shape + layer.scales = scales + layer.resize_mode=resize_mode + layer.align_corners = align_corners + + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,2,2)]) +def test_nearest_mode(): + return torch.nn.Upsample(scale_factor=2, mode="nearest") + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,2,2)]) +def test_bilinear_mode(): + return torch.nn.Upsample(scale_factor=2, mode="bilinear") + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,2,2)]) +def test_align_corner(): + return torch.nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True) + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,3,3)]) +def test_bilinear_mode_odd_input_shape(): + return torch.nn.Upsample(scale_factor=2,mode="bilinear") + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,2,2)]) +def test_size_parameter(): + return torch.nn.Upsample(size=6,mode="bilinear") + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,3,3)]) +def test_size_parameter_odd_input(): + return torch.nn.Upsample(size=6,mode="bilinear") From f6a7eaf49ce15d8d26c0cc8645502ccf54bde125 Mon Sep 17 00:00:00 2001 From: Kshitij Srivastava Date: Thu, 14 Nov 2019 19:00:00 -0500 Subject: [PATCH 263/355] added functional <--> nn.module binding for upsample --- torch2trt/converters/upsample.py | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 torch2trt/converters/upsample.py diff --git a/torch2trt/converters/upsample.py b/torch2trt/converters/upsample.py new file mode 100644 index 00000000..56af3f74 --- /dev/null +++ b/torch2trt/converters/upsample.py @@ -0,0 +1,8 @@ +from torch2trt.torch2trt import * +from .UPSAMPLE import * + +@tensorrt_converter('torch.nn.functional.interpolate') +def convert_interpolate(ctx): + ctx.method_args = (torch.nn.Upsample(ctx.method_args[1]), ctx.method_args[0]) + convert_upsample(ctx) + \ No newline at end of file From 3ecd4f5511ff9d477ffdb85ffbd64152794ced17 Mon Sep 17 00:00:00 2001 From: Kshitij Srivastava Date: Thu, 14 Nov 2019 19:25:26 -0500 Subject: [PATCH 264/355] made changes: --- torch2trt/converters/UPSAMPLE.py | 4 ++-- torch2trt/converters/__init__.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/torch2trt/converters/UPSAMPLE.py b/torch2trt/converters/UPSAMPLE.py index 2950b9e7..9139a30d 100644 --- a/torch2trt/converters/UPSAMPLE.py +++ b/torch2trt/converters/UPSAMPLE.py @@ -9,11 +9,11 @@ def convert_upsample(ctx): output = ctx.method_return shape = module.size - if not instance(shape, tuple): + if not isinstance(shape, tuple): shape = (shape, ) * 2 scales = module.scale_factor - if not instance(scales, tuple): + if not isinstance(scales, tuple): scales = (scales, ) * 2 resize_mode = module.mode diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 12b53233..4446b61d 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -44,6 +44,8 @@ from .view import * from .tanh import * from .transpose import * +from .UPSAMPLE import * +from .upsample import * from .mean import * from .softmax import * from .split import * From 3c00f0965d5fc4baaf04b7bb70034994e5ff99c9 Mon Sep 17 00:00:00 2001 From: Kshitij Srivastava Date: Fri, 15 Nov 2019 12:08:46 -0500 Subject: [PATCH 265/355] working upsample with trt6 --- torch2trt/converters/UPSAMPLE.py | 47 ++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/torch2trt/converters/UPSAMPLE.py b/torch2trt/converters/UPSAMPLE.py index 9139a30d..1f624445 100644 --- a/torch2trt/converters/UPSAMPLE.py +++ b/torch2trt/converters/UPSAMPLE.py @@ -8,25 +8,35 @@ def convert_upsample(ctx): input_trt = trt_(ctx.network, input) output = ctx.method_return + layer = ctx.network.add_resize( + input=input_trt) + shape = module.size - if not isinstance(shape, tuple): - shape = (shape, ) * 2 + if shape != None: + if isinstance(shape, list): + if len(shape) == 2: + shape = (1,shape[0],shape[1]) + if len(shape) == 3: + shape = tuple(shape) + else: + shape = (1,shape,shape) + layer.shape = shape scales = module.scale_factor - if not isinstance(scales, tuple): - scales = (scales, ) * 2 + if scales != None: + if not isinstance(scales, tuple): + scales = (1,scales,scales ) + layer.scales = scales resize_mode = module.mode - align_corners = module.align_corners - - layer = ctx.network.add_resize( - input=input_trt) - - layer.shape = shape - layer.scales = scales - layer.resize_mode=resize_mode - layer.align_corners = align_corners + if resize_mode.lower() in ["linear","bilinear","trilinear"]: + layer.resize_mode = trt.ResizeMode.LINEAR + else: + layer.resize_mode=trt.ResizeMode.NEAREST + align_corners = module.align_corners + if align_corners != None: + layer.align_corners = align_corners output._trt = layer.get_output(0) @@ -36,7 +46,7 @@ def test_nearest_mode(): @add_module_test(torch.float32, torch.device('cuda'), [(1,1,2,2)]) def test_bilinear_mode(): - return torch.nn.Upsample(scale_factor=2, mode="bilinear") + return torch.nn.Upsample(scale_factor=2, mode="bilinear",align_corners=False) @add_module_test(torch.float32, torch.device('cuda'), [(1,1,2,2)]) def test_align_corner(): @@ -44,12 +54,13 @@ def test_align_corner(): @add_module_test(torch.float32, torch.device('cuda'), [(1,1,3,3)]) def test_bilinear_mode_odd_input_shape(): - return torch.nn.Upsample(scale_factor=2,mode="bilinear") + return torch.nn.Upsample(scale_factor=2,mode="bilinear",align_corners=False) @add_module_test(torch.float32, torch.device('cuda'), [(1,1,2,2)]) def test_size_parameter(): - return torch.nn.Upsample(size=6,mode="bilinear") - + return torch.nn.Upsample(size=3,mode="nearest") + @add_module_test(torch.float32, torch.device('cuda'), [(1,1,3,3)]) def test_size_parameter_odd_input(): - return torch.nn.Upsample(size=6,mode="bilinear") + return torch.nn.Upsample(size=6,mode="bilinear",align_corners=False) + From 558cbff2c557145fc9c441087cb1247c81ddc1be Mon Sep 17 00:00:00 2001 From: Kshitij Srivastava Date: Fri, 15 Nov 2019 18:55:36 -0500 Subject: [PATCH 266/355] added F.interpolate converter --- torch2trt/converters/UPSAMPLE.py | 66 ---------------------------- torch2trt/converters/__init__.py | 1 - torch2trt/converters/upsample.py | 74 +++++++++++++++++++++++++++++--- 3 files changed, 67 insertions(+), 74 deletions(-) delete mode 100644 torch2trt/converters/UPSAMPLE.py diff --git a/torch2trt/converters/UPSAMPLE.py b/torch2trt/converters/UPSAMPLE.py deleted file mode 100644 index 1f624445..00000000 --- a/torch2trt/converters/UPSAMPLE.py +++ /dev/null @@ -1,66 +0,0 @@ -from torch2trt.torch2trt import * -from torch2trt.module_test import add_module_test - -@tensorrt_converter('torch.nn.Upsample.forward') -def convert_upsample(ctx): - module = ctx.method_args[0] - input = ctx.method_args[1] - input_trt = trt_(ctx.network, input) - output = ctx.method_return - - layer = ctx.network.add_resize( - input=input_trt) - - shape = module.size - if shape != None: - if isinstance(shape, list): - if len(shape) == 2: - shape = (1,shape[0],shape[1]) - if len(shape) == 3: - shape = tuple(shape) - else: - shape = (1,shape,shape) - layer.shape = shape - - scales = module.scale_factor - if scales != None: - if not isinstance(scales, tuple): - scales = (1,scales,scales ) - layer.scales = scales - - resize_mode = module.mode - if resize_mode.lower() in ["linear","bilinear","trilinear"]: - layer.resize_mode = trt.ResizeMode.LINEAR - else: - layer.resize_mode=trt.ResizeMode.NEAREST - - align_corners = module.align_corners - if align_corners != None: - layer.align_corners = align_corners - output._trt = layer.get_output(0) - - -@add_module_test(torch.float32, torch.device('cuda'), [(1,1,2,2)]) -def test_nearest_mode(): - return torch.nn.Upsample(scale_factor=2, mode="nearest") - -@add_module_test(torch.float32, torch.device('cuda'), [(1,1,2,2)]) -def test_bilinear_mode(): - return torch.nn.Upsample(scale_factor=2, mode="bilinear",align_corners=False) - -@add_module_test(torch.float32, torch.device('cuda'), [(1,1,2,2)]) -def test_align_corner(): - return torch.nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True) - -@add_module_test(torch.float32, torch.device('cuda'), [(1,1,3,3)]) -def test_bilinear_mode_odd_input_shape(): - return torch.nn.Upsample(scale_factor=2,mode="bilinear",align_corners=False) - -@add_module_test(torch.float32, torch.device('cuda'), [(1,1,2,2)]) -def test_size_parameter(): - return torch.nn.Upsample(size=3,mode="nearest") - -@add_module_test(torch.float32, torch.device('cuda'), [(1,1,3,3)]) -def test_size_parameter_odd_input(): - return torch.nn.Upsample(size=6,mode="bilinear",align_corners=False) - diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 4446b61d..6487ce38 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -44,7 +44,6 @@ from .view import * from .tanh import * from .transpose import * -from .UPSAMPLE import * from .upsample import * from .mean import * from .softmax import * diff --git a/torch2trt/converters/upsample.py b/torch2trt/converters/upsample.py index 56af3f74..eaf8b0c6 100644 --- a/torch2trt/converters/upsample.py +++ b/torch2trt/converters/upsample.py @@ -1,8 +1,68 @@ -from torch2trt.torch2trt import * -from .UPSAMPLE import * - +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + @tensorrt_converter('torch.nn.functional.interpolate') -def convert_interpolate(ctx): - ctx.method_args = (torch.nn.Upsample(ctx.method_args[1]), ctx.method_args[0]) - convert_upsample(ctx) - \ No newline at end of file +def convert_interpolate(ctx): + #parse args + input = get_arg(ctx, 'input', pos=0, default=None) + size = get_arg(ctx, 'kernel_size', pos=1, default=None) + scale_factor=get_arg(ctx, 'scale_factor', pos=2, default=None) + mode = get_arg(ctx, 'mode', pos=3, default='nearest') + align_corners = get_arg(ctx, 'align_corners', pos=4, default=None) + + input_trt = trt_(ctx.network, input) + output = ctx.method_return + + layer = ctx.network.add_resize(input=input_trt) + + shape = size + if shape != None: + if isinstance(shape, list): + if len(shape) == 2: + shape = (1,shape[0],shape[1]) + if len(shape) == 3: + shape = tuple(shape) + else: + shape = (1,shape,shape) + layer.shape = shape + + scales = scale_factor + if scales != None: + if not isinstance(scales, tuple): + scales = (1,scales,scales ) + layer.scales = scales + + resize_mode = mode + if resize_mode.lower() in ["linear","bilinear","trilinear"]: + layer.resize_mode = trt.ResizeMode.LINEAR + else: + layer.resize_mode=trt.ResizeMode.NEAREST + + if align_corners != None: + layer.align_corners = align_corners + + output._trt = layer.get_output(0) + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,2,2)]) +def test_nearest_mode(): + return torch.nn.Upsample(scale_factor=2, mode="nearest") + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,2,2)]) +def test_bilinear_mode(): + return torch.nn.Upsample(scale_factor=2, mode="bilinear",align_corners=False) + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,2,2)]) +def test_align_corner(): + return torch.nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True) + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,3,3)]) +def test_bilinear_mode_odd_input_shape(): + return torch.nn.Upsample(scale_factor=2,mode="bilinear",align_corners=False) + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,2,2)]) +def test_size_parameter(): + return torch.nn.Upsample(size=3,mode="nearest") + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,3,3)]) +def test_size_parameter_odd_input(): + return torch.nn.Upsample(size=[6,3],mode="nearest") From f48ad4bb42c1fc45d7b1d5f247a60db4e2ec6ec6 Mon Sep 17 00:00:00 2001 From: Kshitij Srivastava Date: Mon, 18 Nov 2019 08:33:22 -0500 Subject: [PATCH 267/355] (WIP), trt version check --- torch2trt/converters/__init__.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 6487ce38..b6730a08 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -1,5 +1,5 @@ # dummy converters throw warnings method encountered - +import tensorrt as trt from .dummy_converters import * # supported converters will override dummy converters @@ -44,13 +44,18 @@ from .view import * from .tanh import * from .transpose import * -from .upsample import * from .mean import * from .softmax import * from .split import * from .chunk import * from .unary import * -# PLUGINS - -from .interpolate import * \ No newline at end of file +if int(trt.__version__.split(".")[0]) >= 6: + print("using tensorrt version {}".format(trt.__version__.split(".")[0])) + from .upsample import * +else: + print("using tensorrt version {}".format(trt.__version__.split(".")[0])) + try: + from .interpolate import * + except: + pass From 8f63b22ff452540a7f4de5d5cc34066f501bb518 Mon Sep 17 00:00:00 2001 From: Oliver Batchelor Date: Fri, 31 Jan 2020 00:12:50 +1300 Subject: [PATCH 268/355] Add 3d convolutions and interpolations --- torch2trt/converters/Conv3d.py | 68 ++++++++++++++++++++++++++++++++ torch2trt/converters/__init__.py | 1 + torch2trt/converters/upsample.py | 50 +++++++++++++++++------ 3 files changed, 106 insertions(+), 13 deletions(-) create mode 100644 torch2trt/converters/Conv3d.py diff --git a/torch2trt/converters/Conv3d.py b/torch2trt/converters/Conv3d.py new file mode 100644 index 00000000..bb4dc32e --- /dev/null +++ b/torch2trt/converters/Conv3d.py @@ -0,0 +1,68 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.nn.Conv3d.forward') +def convert_Conv3d(ctx): + module = ctx.method_args[0] + input = ctx.method_args[1] + input_trt = trt_(ctx.network, input) + output = ctx.method_return + + kernel_size = module.kernel_size + if not isinstance(kernel_size, tuple): + kernel_size = (kernel_size, ) * 3 + + stride = module.stride + if not isinstance(stride, tuple): + stride = (stride, ) * 3 + + padding = module.padding + if not isinstance(padding, tuple): + padding = (padding, ) * 3 + + dilation = module.dilation + if not isinstance(dilation, tuple): + dilation = (dilation, ) * 3 + + kernel = trt.Weights(module.weight.detach().cpu().numpy()) + + bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) + if module.bias is not None: + bias = trt.Weights(module.bias.detach().cpu().numpy()) + + + layer = ctx.network.add_convolution_nd(input_trt, module.out_channels, + trt.Dims(kernel_size), kernel, bias) + layer.stride_nd = stride + layer.padding_nd = padding + layer.dilation_nd = dilation + + if module.groups is not None: + layer.num_groups = module.groups + + output._trt = layer.get_output(0) + + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)]) +def test_Conv3d_basic(): + return torch.nn.Conv3d(10, 5, kernel_size=1, stride=1, padding=0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)]) +def test_Conv3d_stride2(): + return torch.nn.Conv3d(10, 5, kernel_size=1, stride=2, padding=0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)]) +def test_Conv3d_kernel3(): + return torch.nn.Conv3d(10, 5, kernel_size=3, stride=2, padding=1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)]) +def test_Conv3d_dilation2(): + return torch.nn.Conv3d(10, 5, kernel_size=3, stride=1, padding=1, dilation=2) + + + diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index b6730a08..b5a04e2c 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -18,6 +18,7 @@ from .clamp import * from .Conv1d import * from .Conv2d import * +from .Conv3d import * from .ConvTranspose2d import * from .getitem import * from .identity import * diff --git a/torch2trt/converters/upsample.py b/torch2trt/converters/upsample.py index eaf8b0c6..b95de941 100644 --- a/torch2trt/converters/upsample.py +++ b/torch2trt/converters/upsample.py @@ -10,6 +10,8 @@ def convert_interpolate(ctx): mode = get_arg(ctx, 'mode', pos=3, default='nearest') align_corners = get_arg(ctx, 'align_corners', pos=4, default=None) + input_dim = input.dim() - 2 + input_trt = trt_(ctx.network, input) output = ctx.method_return @@ -18,18 +20,15 @@ def convert_interpolate(ctx): shape = size if shape != None: if isinstance(shape, list): - if len(shape) == 2: - shape = (1,shape[0],shape[1]) - if len(shape) == 3: - shape = tuple(shape) + shape = (1,) + tuple(shape) else: - shape = (1,shape,shape) + shape = (1,) + (shape,) * input_dim layer.shape = shape scales = scale_factor if scales != None: if not isinstance(scales, tuple): - scales = (1,scales,scales ) + scales = (1,) + (scales,) * input_dim layer.scales = scales resize_mode = mode @@ -43,26 +42,51 @@ def convert_interpolate(ctx): output._trt = layer.get_output(0) -@add_module_test(torch.float32, torch.device('cuda'), [(1,1,2,2)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,2,12,12)]) def test_nearest_mode(): return torch.nn.Upsample(scale_factor=2, mode="nearest") -@add_module_test(torch.float32, torch.device('cuda'), [(1,1,2,2)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,4,12,12)]) def test_bilinear_mode(): - return torch.nn.Upsample(scale_factor=2, mode="bilinear",align_corners=False) + return torch.nn.Upsample(scale_factor=3, mode="bilinear",align_corners=False) -@add_module_test(torch.float32, torch.device('cuda'), [(1,1,2,2)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,12,12)]) def test_align_corner(): return torch.nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True) -@add_module_test(torch.float32, torch.device('cuda'), [(1,1,3,3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,5,13,13)]) def test_bilinear_mode_odd_input_shape(): return torch.nn.Upsample(scale_factor=2,mode="bilinear",align_corners=False) -@add_module_test(torch.float32, torch.device('cuda'), [(1,1,2,2)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,4,12,12)]) def test_size_parameter(): return torch.nn.Upsample(size=3,mode="nearest") -@add_module_test(torch.float32, torch.device('cuda'), [(1,1,3,3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,2,13,13)]) def test_size_parameter_odd_input(): return torch.nn.Upsample(size=[6,3],mode="nearest") + + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,12,12,12)]) +def test_nearest_mode_3d(): + return torch.nn.Upsample(scale_factor=2, mode="nearest") + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,12,12,12)]) +def test_bilinear_mode_3d(): + return torch.nn.Upsample(scale_factor=3, mode="trilinear",align_corners=False) + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,12,12,12)]) +def test_align_corner_3d(): + return torch.nn.Upsample(scale_factor=4, mode="trilinear", align_corners=True) + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,13,13,13)]) +def test_bilinear_mode_odd_input_shape_3d(): + return torch.nn.Upsample(scale_factor=2, mode="trilinear",align_corners=False) + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,12,12,12)]) +def test_size_parameter_3d(): + return torch.nn.Upsample(size=3,mode="trilinear", align_corners=True) + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,13,13,13)]) +def test_size_parameter_odd_input_3d(): + return torch.nn.Upsample(size=[6,3,3],mode="trilinear", align_corners=False) \ No newline at end of file From 3ebf1b9a1e942499d50b5be4588406bbf08a1985 Mon Sep 17 00:00:00 2001 From: Oliver Batchelor Date: Sun, 2 Feb 2020 15:44:41 +1300 Subject: [PATCH 269/355] Update lots of ops --- torch2trt/converters/BatchNorm2d.py | 17 ------- torch2trt/converters/Conv2d.py | 68 ------------------------- torch2trt/converters/Conv3d.py | 68 ------------------------- torch2trt/converters/ConvTranspose2d.py | 41 --------------- torch2trt/converters/__init__.py | 11 ++-- torch2trt/converters/avg_pool2d.py | 54 -------------------- torch2trt/converters/cat.py | 23 ++++++--- torch2trt/converters/permute.py | 2 +- torch2trt/converters/unary.py | 1 + torch2trt/converters/upsample.py | 38 ++++++++------ torch2trt/converters/view.py | 39 ++++++++++++-- torch2trt/test.py | 9 +++- torch2trt/torch2trt.py | 8 ++- 13 files changed, 95 insertions(+), 284 deletions(-) delete mode 100644 torch2trt/converters/BatchNorm2d.py delete mode 100644 torch2trt/converters/Conv2d.py delete mode 100644 torch2trt/converters/Conv3d.py delete mode 100644 torch2trt/converters/ConvTranspose2d.py delete mode 100644 torch2trt/converters/avg_pool2d.py diff --git a/torch2trt/converters/BatchNorm2d.py b/torch2trt/converters/BatchNorm2d.py deleted file mode 100644 index 0dff8486..00000000 --- a/torch2trt/converters/BatchNorm2d.py +++ /dev/null @@ -1,17 +0,0 @@ -from torch2trt.torch2trt import * - - -@tensorrt_converter('torch.nn.BatchNorm2d.forward') -def convert_BatchNorm2d(ctx): - module = ctx.method_args[0] - input = ctx.method_args[1] - input_trt = trt_(ctx.network, input) - output = ctx.method_return - - scale = module.weight.detach().cpu().numpy() / np.sqrt(module.running_var.detach().cpu().numpy() + module.eps) - bias = module.bias.detach().cpu().numpy() - module.running_mean.detach().cpu().numpy() * scale - power = np.ones_like(scale) - - layer = ctx.network.add_scale(input_trt, trt.ScaleMode.CHANNEL, bias, scale, power) - - output._trt = layer.get_output(0) \ No newline at end of file diff --git a/torch2trt/converters/Conv2d.py b/torch2trt/converters/Conv2d.py deleted file mode 100644 index 5bd6ec85..00000000 --- a/torch2trt/converters/Conv2d.py +++ /dev/null @@ -1,68 +0,0 @@ -from torch2trt.torch2trt import * -from torch2trt.module_test import add_module_test - - -@tensorrt_converter('torch.nn.Conv2d.forward') -def convert_Conv2d(ctx): - module = ctx.method_args[0] - input = ctx.method_args[1] - input_trt = trt_(ctx.network, input) - output = ctx.method_return - - kernel_size = module.kernel_size - if not isinstance(kernel_size, tuple): - kernel_size = (kernel_size, ) * 2 - - stride = module.stride - if not isinstance(stride, tuple): - stride = (stride, ) * 2 - - padding = module.padding - if not isinstance(padding, tuple): - padding = (padding, ) * 2 - - dilation = module.dilation - if not isinstance(dilation, tuple): - dilation = (dilation, ) * 2 - - kernel = module.weight.detach().cpu().numpy() - - bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) - if module.bias is not None: - bias = module.bias.detach().cpu().numpy() - - layer = ctx.network.add_convolution( - input=input_trt, - num_output_maps=module.out_channels, - kernel_shape=kernel_size, - kernel=kernel, - bias=bias) - layer.stride = stride - layer.padding = padding - layer.dilation = dilation - - if module.groups is not None: - layer.num_groups = module.groups - - output._trt = layer.get_output(0) - - - -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)]) -def test_Conv2d_basic(): - return torch.nn.Conv2d(10, 5, kernel_size=1, stride=1, padding=0) - - -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)]) -def test_Conv2d_stride2(): - return torch.nn.Conv2d(10, 5, kernel_size=1, stride=2, padding=0) - - -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)]) -def test_Conv2d_kernel3(): - return torch.nn.Conv2d(10, 5, kernel_size=3, stride=2, padding=1) - - -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)]) -def test_Conv2d_dilation2(): - return torch.nn.Conv2d(10, 5, kernel_size=3, stride=1, padding=1, dilation=2) diff --git a/torch2trt/converters/Conv3d.py b/torch2trt/converters/Conv3d.py deleted file mode 100644 index bb4dc32e..00000000 --- a/torch2trt/converters/Conv3d.py +++ /dev/null @@ -1,68 +0,0 @@ -from torch2trt.torch2trt import * -from torch2trt.module_test import add_module_test - - -@tensorrt_converter('torch.nn.Conv3d.forward') -def convert_Conv3d(ctx): - module = ctx.method_args[0] - input = ctx.method_args[1] - input_trt = trt_(ctx.network, input) - output = ctx.method_return - - kernel_size = module.kernel_size - if not isinstance(kernel_size, tuple): - kernel_size = (kernel_size, ) * 3 - - stride = module.stride - if not isinstance(stride, tuple): - stride = (stride, ) * 3 - - padding = module.padding - if not isinstance(padding, tuple): - padding = (padding, ) * 3 - - dilation = module.dilation - if not isinstance(dilation, tuple): - dilation = (dilation, ) * 3 - - kernel = trt.Weights(module.weight.detach().cpu().numpy()) - - bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) - if module.bias is not None: - bias = trt.Weights(module.bias.detach().cpu().numpy()) - - - layer = ctx.network.add_convolution_nd(input_trt, module.out_channels, - trt.Dims(kernel_size), kernel, bias) - layer.stride_nd = stride - layer.padding_nd = padding - layer.dilation_nd = dilation - - if module.groups is not None: - layer.num_groups = module.groups - - output._trt = layer.get_output(0) - - - -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)]) -def test_Conv3d_basic(): - return torch.nn.Conv3d(10, 5, kernel_size=1, stride=1, padding=0) - - -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)]) -def test_Conv3d_stride2(): - return torch.nn.Conv3d(10, 5, kernel_size=1, stride=2, padding=0) - - -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)]) -def test_Conv3d_kernel3(): - return torch.nn.Conv3d(10, 5, kernel_size=3, stride=2, padding=1) - - -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)]) -def test_Conv3d_dilation2(): - return torch.nn.Conv3d(10, 5, kernel_size=3, stride=1, padding=1, dilation=2) - - - diff --git a/torch2trt/converters/ConvTranspose2d.py b/torch2trt/converters/ConvTranspose2d.py deleted file mode 100644 index 1b9af097..00000000 --- a/torch2trt/converters/ConvTranspose2d.py +++ /dev/null @@ -1,41 +0,0 @@ -from torch2trt.torch2trt import * - - -@tensorrt_converter('torch.nn.ConvTranspose2d.forward') -def convert_ConvTranspose2d(ctx): - module = ctx.method_args[0] - input = ctx.method_args[1] - input_trt = trt_(ctx.network, input) - output = ctx.method_return - - kernel_size = module.kernel_size - if not isinstance(kernel_size, tuple): - kernel_size = (kernel_size, ) * 2 - - stride = module.stride - if not isinstance(stride, tuple): - stride = (stride, ) * 2 - - padding = module.padding - if not isinstance(padding, tuple): - padding = (padding, ) * 2 - - kernel = module.weight.detach().cpu().numpy() - - bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) - if module.bias is not None: - bias = module.bias.detach().cpu().numpy() - - layer = ctx.network.add_deconvolution( - input=input_trt, - num_output_maps=module.out_channels, - kernel_shape=kernel_size, - kernel=kernel, - bias=bias) - layer.stride = stride - layer.padding = padding - - if module.groups is not None: - layer.num_groups = module.groups - - output._trt = layer.get_output(0) \ No newline at end of file diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index b5a04e2c..e63fc48b 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -9,17 +9,17 @@ from .adaptive_max_pool2d import * from .AdaptiveAvgPool2d import * from .add import * -from .avg_pool2d import * +from .avg_pool import * from .mul import * +from .compare import * from .div import * from .BatchNorm1d import * -from .BatchNorm2d import * +from .batch_norm import * from .cat import * from .clamp import * from .Conv1d import * -from .Conv2d import * -from .Conv3d import * -from .ConvTranspose2d import * +from .Conv import * +from .ConvTranspose import * from .getitem import * from .identity import * from .Identity import * @@ -40,6 +40,7 @@ from .relu6 import * from .ReLU6 import * from .sigmoid import * +from .stack import * from .sub import * from .sum import * from .view import * diff --git a/torch2trt/converters/avg_pool2d.py b/torch2trt/converters/avg_pool2d.py deleted file mode 100644 index 0b905d01..00000000 --- a/torch2trt/converters/avg_pool2d.py +++ /dev/null @@ -1,54 +0,0 @@ -from torch2trt.torch2trt import * -from torch2trt.module_test import add_module_test - - -@tensorrt_converter('torch.nn.functional.avg_pool2d') -def convert_avg_pool2d(ctx): - # parse args - input = get_arg(ctx, 'input', pos=0, default=None) - kernel_size = get_arg(ctx, 'kernel_size', pos=1, default=None) - stride = get_arg(ctx, 'stride', pos=2, default=None) - padding = get_arg(ctx, 'padding', pos=3, default=0) - ceil_mode = get_arg(ctx, 'ceil_mode', pos=4, default=False) - count_include_pad = get_arg(ctx, 'count_include_pad', pos=5, default=True) - - # get input trt tensor (or create constant if it doesn't exist) - input_trt = trt_(ctx.network, input) - - output = ctx.method_return - - # get kernel size - if not isinstance(kernel_size, tuple): - kernel_size = (kernel_size, ) * 2 - - # get stride - if not isinstance(stride, tuple): - stride = (stride, ) * 2 - - # get padding - if not isinstance(padding, tuple): - padding = (padding, ) * 2 - - layer = ctx.network.add_pooling( - input=input_trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size) - - layer.stride = stride - layer.padding = padding - layer.average_count_excludes_padding = not count_include_pad - - if ceil_mode: - layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP - - output._trt = layer.get_output(0) - - -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)]) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)]) -def test_avg_pool2d_without_ceil_mode(): - return torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False) - - -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)]) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)]) -def test_avg_pool2d_with_ceil_mode(): - return torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True, count_include_pad=False) # TRT does not support ceil_mode=True && count_include_pad=True diff --git a/torch2trt/converters/cat.py b/torch2trt/converters/cat.py index 39f5b41d..109fadf3 100644 --- a/torch2trt/converters/cat.py +++ b/torch2trt/converters/cat.py @@ -1,18 +1,27 @@ from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test @tensorrt_converter('torch.cat') def convert_cat(ctx): - inputs = ctx.method_args[0] - - if 'dim' in ctx.method_kwargs: - dim = ctx.method_kwargs['dim'] - else: - dim = ctx.method_args[1] + inputs = get_arg(ctx, 'input', pos=0, default=None) + dim = get_arg(ctx, 'dim', pos=1, default=0) output = ctx.method_return trt_inputs = [trt_(ctx.network, i) for i in inputs] layer = ctx.network.add_concatenation(inputs=trt_inputs) layer.axis = dim - 1 - output._trt = layer.get_output(0) \ No newline at end of file + output._trt = layer.get_output(0) + +class Cat(torch.nn.Module): + def __init__(self, dim): + super(Cat, self).__init__() + self.dim = dim + + def forward(self, *x): + return torch.cat(x, dim=self.dim) + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 4, 4), (1, 3, 4), (1, 17, 4)]) +def test_Cat_basic(): + return Cat(1) \ No newline at end of file diff --git a/torch2trt/converters/permute.py b/torch2trt/converters/permute.py index c7baa0b8..7ef6fdf8 100644 --- a/torch2trt/converters/permute.py +++ b/torch2trt/converters/permute.py @@ -20,7 +20,7 @@ def convert_permute(ctx): layer = ctx.network.add_shuffle(input_trt) layer.second_transpose = tuple(trt_permutation) - + output._trt = layer.get_output(0) diff --git a/torch2trt/converters/unary.py b/torch2trt/converters/unary.py index 95000957..67b64b4c 100644 --- a/torch2trt/converters/unary.py +++ b/torch2trt/converters/unary.py @@ -103,6 +103,7 @@ def test_abs(): @tensorrt_converter('torch.neg') @tensorrt_converter('torch.neg_') @tensorrt_converter('torch.Tensor.neg') +@tensorrt_converter('torch.Tensor.__neg__') @tensorrt_converter('torch.Tensor.neg_') def convert_neg(ctx): __convert_unary(ctx, trt.UnaryOperation.NEG) diff --git a/torch2trt/converters/upsample.py b/torch2trt/converters/upsample.py index b95de941..1f489fc7 100644 --- a/torch2trt/converters/upsample.py +++ b/torch2trt/converters/upsample.py @@ -1,11 +1,13 @@ from torch2trt.torch2trt import * from torch2trt.module_test import add_module_test +import collections @tensorrt_converter('torch.nn.functional.interpolate') +@tensorrt_converter('torch.nn.functional.upsample') def convert_interpolate(ctx): #parse args input = get_arg(ctx, 'input', pos=0, default=None) - size = get_arg(ctx, 'kernel_size', pos=1, default=None) + size = get_arg(ctx, 'size', pos=1, default=None) scale_factor=get_arg(ctx, 'scale_factor', pos=2, default=None) mode = get_arg(ctx, 'mode', pos=3, default='nearest') align_corners = get_arg(ctx, 'align_corners', pos=4, default=None) @@ -14,22 +16,24 @@ def convert_interpolate(ctx): input_trt = trt_(ctx.network, input) output = ctx.method_return - layer = ctx.network.add_resize(input=input_trt) shape = size if shape != None: - if isinstance(shape, list): - shape = (1,) + tuple(shape) + if isinstance(shape, collections.Sequence): + shape = [input.size(1)] + list(shape) else: - shape = (1,) + (shape,) * input_dim + shape = [input.size(1)] + [shape] * input_dim + layer.shape = shape scales = scale_factor if scales != None: - if not isinstance(scales, tuple): - scales = (1,) + (scales,) * input_dim - layer.scales = scales + if not isinstance(scales, collections.Sequence): + scales = [scales] * input_dim + layer.scales = [1] + list(scales) + + print(input.shape, output.shape, scales, shape) resize_mode = mode if resize_mode.lower() in ["linear","bilinear","trilinear"]: @@ -62,24 +66,27 @@ def test_bilinear_mode_odd_input_shape(): def test_size_parameter(): return torch.nn.Upsample(size=3,mode="nearest") -@add_module_test(torch.float32, torch.device('cuda'), [(1,2,13,13)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,13,13)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,1,1)]) def test_size_parameter_odd_input(): return torch.nn.Upsample(size=[6,3],mode="nearest") -@add_module_test(torch.float32, torch.device('cuda'), [(1,1,12,12,12)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,4,6,6,6)]) def test_nearest_mode_3d(): return torch.nn.Upsample(scale_factor=2, mode="nearest") -@add_module_test(torch.float32, torch.device('cuda'), [(1,1,12,12,12)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,5,5,5)]) def test_bilinear_mode_3d(): return torch.nn.Upsample(scale_factor=3, mode="trilinear",align_corners=False) -@add_module_test(torch.float32, torch.device('cuda'), [(1,1,12,12,12)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,4,8,8,8)]) def test_align_corner_3d(): return torch.nn.Upsample(scale_factor=4, mode="trilinear", align_corners=True) -@add_module_test(torch.float32, torch.device('cuda'), [(1,1,13,13,13)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,6,7,7,7)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,2,4,4)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,1,1,1)]) def test_bilinear_mode_odd_input_shape_3d(): return torch.nn.Upsample(scale_factor=2, mode="trilinear",align_corners=False) @@ -87,6 +94,7 @@ def test_bilinear_mode_odd_input_shape_3d(): def test_size_parameter_3d(): return torch.nn.Upsample(size=3,mode="trilinear", align_corners=True) -@add_module_test(torch.float32, torch.device('cuda'), [(1,1,13,13,13)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,7,9,5)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,4,3,5,1)]) def test_size_parameter_odd_input_3d(): - return torch.nn.Upsample(size=[6,3,3],mode="trilinear", align_corners=False) \ No newline at end of file + return torch.nn.Upsample(size=[11,14,17],mode="trilinear", align_corners=False) \ No newline at end of file diff --git a/torch2trt/converters/view.py b/torch2trt/converters/view.py index 0325a83e..5d674f8b 100644 --- a/torch2trt/converters/view.py +++ b/torch2trt/converters/view.py @@ -5,6 +5,10 @@ @tensorrt_converter('torch.flatten') @tensorrt_converter('torch.Tensor.reshape') @tensorrt_converter('torch.Tensor.view') +@tensorrt_converter('torch.Tensor.squeeze') +@tensorrt_converter('torch.Tensor.unsqueeze') +@tensorrt_converter('torch.squeeze') +@tensorrt_converter('torch.unsqueeze') def convert_view(ctx): input = ctx.method_args[0] input_trt = trt_(ctx.network, input) @@ -23,6 +27,22 @@ def forward(self, x): return x.view(*self.dims) +class Squeeze(torch.nn.Module): + def __init__(self, dim): + super(Squeeze, self).__init__() + self.dim = dim + + def forward(self, x): + return x.squeeze(dim=self.dim) + +class UnSqueeze(torch.nn.Module): + def __init__(self, dim): + super(UnSqueeze, self).__init__() + self.dim = dim + + def forward(self, x): + return x.unsqueeze(dim=self.dim) + @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) @@ -38,8 +58,19 @@ def test_view_2d(): return View(1, 1, -1) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3, 6)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3, 3, 6)]) def test_view_3d(): - return View(1, 1, 1, -1) + return View(1, 3, 3, -1) + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 7)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 3)]) +def test_unsqueeze(): + return UnSqueeze(2) + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 1)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 1, 3)]) +def test_squeeze(): + return Squeeze(2) + + diff --git a/torch2trt/test.py b/torch2trt/test.py index 464ea31d..5578b882 100644 --- a/torch2trt/test.py +++ b/torch2trt/test.py @@ -20,7 +20,7 @@ def run(self): inputs_conversion += (torch.zeros(shape).to(self.device).type(self.dtype), ) # convert module - module_trt = torch2trt(module, inputs_conversion, **self.torch2trt_kwargs) + module_trt = torch2trt(module, inputs_conversion, max_workspace_size=1 << 20, **self.torch2trt_kwargs) # create inputs for torch/trt.. copy of inputs to handle inplace ops inputs = () @@ -39,7 +39,12 @@ def run(self): # compute max error max_error = 0 for i in range(len(outputs)): - max_error_i = torch.max(torch.abs(outputs[i] - outputs_trt[i])) + max_error_i = 0 + if outputs[i].dtype == torch.bool: + max_error_i = torch.sum(outputs[i] ^ outputs_trt[i]) + else: + max_error_i = torch.max(torch.abs(outputs[i] - outputs_trt[i])) + if max_error_i > max_error: max_error = max_error_i diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index bd860ab0..c75be1b2 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -9,6 +9,8 @@ def torch_dtype_to_trt(dtype): + if dtype == torch.bool: + return trt.bool if dtype == torch.int8: return trt.int8 elif dtype == torch.int32: @@ -24,6 +26,8 @@ def torch_dtype_to_trt(dtype): def torch_dtype_from_trt(dtype): if dtype == trt.int8: return torch.int8 + if dtype == trt.bool: + return torch.bool elif dtype == trt.int32: return torch.int32 elif dtype == trt.float16: @@ -223,12 +227,12 @@ def __init__(self, ctx, method, converter): def _set_method(self, method): exec('%s = method' % self.method_str) - def __enter__(self): + def __enter__(self): try: self.method_impl = eval(self.method_str) except AttributeError: self.method_impl = None - + if self.method_impl: self._set_method(attach_converter(self.ctx, self.method_impl, self.converter, self.method_str)) From 63e65d4496865003f682bba862c8b3b5b19cb448 Mon Sep 17 00:00:00 2001 From: Oliver Batchelor Date: Sun, 2 Feb 2020 22:56:33 +1300 Subject: [PATCH 270/355] More opreations --- torch2trt/converters/Conv.py | 91 +++++++++++++++++++++++++++ torch2trt/converters/ConvTranspose.py | 79 +++++++++++++++++++++++ torch2trt/converters/avg_pool.py | 68 ++++++++++++++++++++ torch2trt/converters/batch_norm.py | 40 ++++++++++++ torch2trt/converters/compare.py | 59 +++++++++++++++++ torch2trt/converters/mul.py | 2 +- torch2trt/converters/stack.py | 40 ++++++++++++ torch2trt/converters/sum.py | 18 +++++- torch2trt/converters/upsample.py | 2 - 9 files changed, 394 insertions(+), 5 deletions(-) create mode 100644 torch2trt/converters/Conv.py create mode 100644 torch2trt/converters/ConvTranspose.py create mode 100644 torch2trt/converters/avg_pool.py create mode 100644 torch2trt/converters/batch_norm.py create mode 100644 torch2trt/converters/compare.py create mode 100644 torch2trt/converters/stack.py diff --git a/torch2trt/converters/Conv.py b/torch2trt/converters/Conv.py new file mode 100644 index 00000000..204d002e --- /dev/null +++ b/torch2trt/converters/Conv.py @@ -0,0 +1,91 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.nn.Conv2d.forward') +@tensorrt_converter('torch.nn.Conv3d.forward') +def convert_Conv2d(ctx): + module = ctx.method_args[0] + input = ctx.method_args[1] + input_trt = trt_(ctx.network, input) + output = ctx.method_return + + input_dim = input.dim() - 2 + + kernel_size = module.kernel_size + if not isinstance(kernel_size, tuple): + kernel_size = (kernel_size, ) * input_dim + + stride = module.stride + if not isinstance(stride, tuple): + stride = (stride, ) * input_dim + + padding = module.padding + if not isinstance(padding, tuple): + padding = (padding, ) * input_dim + + dilation = module.dilation + if not isinstance(dilation, tuple): + dilation = (dilation, ) * input_dim + + kernel = module.weight.detach().cpu().numpy() + + bias = None #trt.Weights(torch_dtype_to_trt(module.weight.dtype)) + if module.bias is not None: + bias = module.bias.detach().cpu().numpy() + + layer = ctx.network.add_convolution_nd( + input=input_trt, + num_output_maps=module.out_channels, + kernel_shape=kernel_size, + kernel=kernel, + bias=bias) + layer.stride_nd = stride + layer.padding_nd = padding + layer.dilation_nd = dilation + + if module.groups is not None: + layer.num_groups = module.groups + + output._trt = layer.get_output(0) + + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)]) +def test_Conv2d_basic(): + return torch.nn.Conv2d(10, 5, kernel_size=1, stride=1, padding=0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)]) +def test_Conv2d_stride2(): + return torch.nn.Conv2d(10, 5, kernel_size=1, stride=2, padding=0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)]) +def test_Conv2d_kernel3(): + return torch.nn.Conv2d(10, 5, kernel_size=3, stride=2, padding=1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)]) +def test_Conv2d_dilation2(): + return torch.nn.Conv2d(10, 5, kernel_size=3, stride=1, padding=1, dilation=2) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)]) +def test_Conv3d_basic(): + return torch.nn.Conv3d(10, 5, kernel_size=1, stride=1, padding=0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)]) +def test_Conv3d_stride2(): + return torch.nn.Conv3d(10, 5, kernel_size=1, stride=2, padding=0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)]) +def test_Conv3d_kernel3(): + return torch.nn.Conv3d(10, 5, kernel_size=3, stride=2, padding=1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)]) +def test_Conv3d_dilation2(): + return torch.nn.Conv3d(10, 5, kernel_size=3, stride=1, padding=1, dilation=2) \ No newline at end of file diff --git a/torch2trt/converters/ConvTranspose.py b/torch2trt/converters/ConvTranspose.py new file mode 100644 index 00000000..dbceb43b --- /dev/null +++ b/torch2trt/converters/ConvTranspose.py @@ -0,0 +1,79 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.nn.ConvTranspose2d.forward') +@tensorrt_converter('torch.nn.ConvTranspose3d.forward') +def convert_ConvTranspose2d(ctx): + module = ctx.method_args[0] + input = ctx.method_args[1] + input_trt = trt_(ctx.network, input) + output = ctx.method_return + + input_dim = input.dim() - 2 + + kernel_size = module.kernel_size + if not isinstance(kernel_size, tuple): + kernel_size = (kernel_size, ) * input_dim + + stride = module.stride + if not isinstance(stride, tuple): + stride = (stride, ) * input_dim + + padding = module.padding + if not isinstance(padding, tuple): + padding = (padding, ) * input_dim + + assert module.dilation == 1 or all([d == 1 for d in module.dilation]), \ + "Transposed convolution dilation is not supported in TensorRT" + + kernel = module.weight.detach().cpu().numpy() + + bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) + if module.bias is not None: + bias = module.bias.detach().cpu().numpy() + + layer = ctx.network.add_deconvolution_nd( + input=input_trt, + num_output_maps=module.out_channels, + kernel_shape=kernel_size, + kernel=kernel, + bias=bias) + layer.stride_nd = stride + layer.padding_nd = padding + + if module.groups is not None: + layer.num_groups = module.groups + + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 7, 7)]) +def test_ConvTranspose2d_basic(): + return torch.nn.ConvTranspose2d(10, 5, kernel_size=1, stride=1, padding=0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 8, 8)]) +def test_ConvTranspose2d_stride2(): + return torch.nn.ConvTranspose2d(10, 5, kernel_size=1, stride=2, padding=0) + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 9, 9)]) +def test_ConvTranspose2d_kernel3(): + return torch.nn.ConvTranspose2d(10, 5, kernel_size=3, stride=2, padding=1) + + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 7, 7, 7)]) +def test_ConvTranspose3d_basic(): + return torch.nn.ConvTranspose3d(10, 5, kernel_size=1, stride=1, padding=0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 7, 7, 7)]) +def test_ConvTranspose3d_stride2(): + return torch.nn.ConvTranspose3d(10, 5, kernel_size=1, stride=2, padding=0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 6, 6, 6)]) +def test_ConvTranspose3d_kernel3(): + return torch.nn.ConvTranspose3d(10, 5, kernel_size=3, stride=2, padding=1) + diff --git a/torch2trt/converters/avg_pool.py b/torch2trt/converters/avg_pool.py new file mode 100644 index 00000000..1db9133d --- /dev/null +++ b/torch2trt/converters/avg_pool.py @@ -0,0 +1,68 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.nn.functional.avg_pool2d') +@tensorrt_converter('torch.nn.functional.avg_pool3d') +def convert_avg_pool(ctx): + # parse args + input = get_arg(ctx, 'input', pos=0, default=None) + kernel_size = get_arg(ctx, 'kernel_size', pos=1, default=None) + stride = get_arg(ctx, 'stride', pos=2, default=None) + padding = get_arg(ctx, 'padding', pos=3, default=0) + ceil_mode = get_arg(ctx, 'ceil_mode', pos=4, default=False) + count_include_pad = get_arg(ctx, 'count_include_pad', pos=5, default=True) + + # get input trt tensor (or create constant if it doesn't exist) + input_trt = trt_(ctx.network, input) + output = ctx.method_return + + input_dim = input.dim() - 2 + + # get kernel size + if not isinstance(kernel_size, tuple): + kernel_size = (kernel_size, ) * input_dim + + # get stride + if not isinstance(stride, tuple): + stride = (stride, ) * input_dim + + # get padding + if not isinstance(padding, tuple): + padding = (padding, ) * input_dim + + layer = ctx.network.add_pooling_nd( + input=input_trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size) + + layer.stride_nd = stride + layer.padding_nd = padding + layer.average_count_excludes_padding = not count_include_pad + + if ceil_mode: + layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP + + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)]) +def test_avg_pool2d_without_ceil_mode(): + return torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)]) +def test_avg_pool2d_with_ceil_mode(): + return torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True, count_include_pad=False) # TRT does not support ceil_mode=True && count_include_pad=True + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 4, 6)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 5, 7)]) +def test_avg_pool3d_without_ceil_mode(): + return torch.nn.AvgPool3d(kernel_size=3, stride=2, padding=1, ceil_mode=False) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 4, 6)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 5, 7)]) +def test_avg_pool3d_with_ceil_mode(): + return torch.nn.AvgPool3d(kernel_size=3, stride=2, padding=1, ceil_mode=True, count_include_pad=False) # TRT does not support ceil_mode=True && count_include_pad=True diff --git a/torch2trt/converters/batch_norm.py b/torch2trt/converters/batch_norm.py new file mode 100644 index 00000000..5af8ac6c --- /dev/null +++ b/torch2trt/converters/batch_norm.py @@ -0,0 +1,40 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + +@tensorrt_converter('torch.nn.functional.batch_norm') +def convert_batch_norm(ctx): + + input = get_arg(ctx, 'input', pos=0, default=None) + running_mean = get_arg(ctx, 'running_mean', pos=1, default=None) + running_var = get_arg(ctx, 'running_var', pos=2, default=None) + + weight = get_arg(ctx, 'weight', pos=3, default=None) + bias = get_arg(ctx, 'bias', pos=4, default=None) + eps = get_arg(ctx, 'eps', pos=7, default=10e-6) + + input_trt = trt_(ctx.network, input) + output = ctx.method_return + + scale = weight.detach().cpu().numpy() / np.sqrt(running_var.detach().cpu().numpy() + eps) + bias = bias.detach().cpu().numpy() - running_mean.detach().cpu().numpy() * scale + power = np.ones_like(scale) + + layer = ctx.network.add_scale_nd(input_trt, trt.ScaleMode.CHANNEL, bias, scale, power, 0) + output._trt = layer.get_output(0) + + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)]) +def test_batch_norm_2d(): + return torch.nn.BatchNorm2d(10) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3, 3)]) +def test_batch_norm_3d_2(): + return torch.nn.BatchNorm3d(10) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 32, 2, 36, 47)]) +def test_batch_norm_3d(): + return torch.nn.BatchNorm3d(32) + \ No newline at end of file diff --git a/torch2trt/converters/compare.py b/torch2trt/converters/compare.py new file mode 100644 index 00000000..f57b9a42 --- /dev/null +++ b/torch2trt/converters/compare.py @@ -0,0 +1,59 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + +def convert_elementwise(ctx, op): + input_a = ctx.method_args[0] + input_b = ctx.method_args[1] + input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) + output = ctx.method_return + layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, op) + output._trt = layer.get_output(0) + +@tensorrt_converter('torch.gt') +@tensorrt_converter('torch.Tensor.__gt__') +def convert_gt(ctx): + return convert_elementwise(ctx, trt.ElementWiseOperation.GREATER) + +@tensorrt_converter('torch.lt') +@tensorrt_converter('torch.Tensor.__lt__') +def convert_gt(ctx): + return convert_elementwise(ctx, trt.ElementWiseOperation.LESS) + +@tensorrt_converter('torch.eq') +@tensorrt_converter('torch.Tensor.__eq__') +def convert_gt(ctx): + return convert_elementwise(ctx, trt.ElementWiseOperation.EQUAL) + +class GT(torch.nn.Module): + def __init__(self): + super(GT, self).__init__() + + def forward(self, x, y): + return x > y + +class LT(torch.nn.Module): + def __init__(self): + super(LT, self).__init__() + + def forward(self, x, y): + return x < y + +class EQ(torch.nn.Module): + def __init__(self): + super(EQ, self).__init__() + + def forward(self, x, y): + return x == y + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 6, 6), (1, 3, 6, 6)]) +def test_gt_basic(): + return GT() + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 6, 6), (1, 3, 6, 6)]) +def test_gt_basic(): + return LT() + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 6, 6), (1, 3, 6, 6)]) +def test_gt_basic(): + return EQ() diff --git a/torch2trt/converters/mul.py b/torch2trt/converters/mul.py index e3b2f991..9959e4b7 100644 --- a/torch2trt/converters/mul.py +++ b/torch2trt/converters/mul.py @@ -1,6 +1,7 @@ from torch2trt.torch2trt import * from torch2trt.module_test import add_module_test +from tools import shape @tensorrt_converter('torch.mul') @tensorrt_converter('torch.Tensor.__imul__') @@ -14,7 +15,6 @@ def convert_mul(ctx): layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.PROD) output._trt = layer.get_output(0) - class Mul(torch.nn.Module): def __init__(self): super(Mul, self).__init__() diff --git a/torch2trt/converters/stack.py b/torch2trt/converters/stack.py new file mode 100644 index 00000000..0b5b4def --- /dev/null +++ b/torch2trt/converters/stack.py @@ -0,0 +1,40 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +def unsqueeze(ctx, input, dim): + layer = ctx.network.add_shuffle(trt_(ctx.network, input)) + + shape = input.shape[1:dim] + (1,) + input.shape[dim:] + layer.reshape_dims = tuple(shape) + + return layer.get_output(0) + + +@tensorrt_converter('torch.stack') +def convert_cat(ctx): + inputs = get_arg(ctx, 'input', pos=0, default=None) + dim = get_arg(ctx, 'dim', pos=1, default=0) + + output = ctx.method_return + trt_inputs = [unsqueeze(ctx, i, dim) for i in inputs] + + layer = ctx.network.add_concatenation(inputs=trt_inputs) + layer.axis = dim - 1 + output._trt = layer.get_output(0) + +class Stack(torch.nn.Module): + def __init__(self, dim): + super(Stack, self).__init__() + self.dim = dim + + def forward(self, *x): + return torch.stack(x, dim=self.dim) + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 4, 4), (1, 4, 4), (1, 4, 4)]) +def test_Stack_basic(): + return Stack(3) + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 4, 4), (1, 4, 4), (1, 4, 4)]) +def test_Stack_basic2(): + return Stack(1) \ No newline at end of file diff --git a/torch2trt/converters/sum.py b/torch2trt/converters/sum.py index e3873e76..52f21eb0 100644 --- a/torch2trt/converters/sum.py +++ b/torch2trt/converters/sum.py @@ -1,7 +1,7 @@ from torch2trt.torch2trt import * from torch2trt.module_test import add_module_test from .unary import UnaryModule - +from torch import nn @tensorrt_converter('torch.sum') @tensorrt_converter('torch.Tensor.sum') @@ -35,4 +35,18 @@ def test_sum_reduce_dim22(): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) def test_sum_reduce_dim1_keepdim(): - return UnaryModule(lambda x: torch.sum(x, 1, keepdim=True)) \ No newline at end of file + return UnaryModule(lambda x: torch.sum(x, 1, keepdim=True)) + + +class DisparityRegression(nn.Module): + def __init__(self, maxdisp): + super(DisparityRegression, self).__init__() + self.register_buffer('disp', torch.arange(maxdisp, dtype=torch.float32).view(maxdisp, 1, 1)) + + def forward(self, x): + return torch.sum(x * self.disp, 1) + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 23, 23)]) +def test_disparity_reg(): + return DisparityRegression(10) \ No newline at end of file diff --git a/torch2trt/converters/upsample.py b/torch2trt/converters/upsample.py index 1f489fc7..362a1d84 100644 --- a/torch2trt/converters/upsample.py +++ b/torch2trt/converters/upsample.py @@ -33,8 +33,6 @@ def convert_interpolate(ctx): scales = [scales] * input_dim layer.scales = [1] + list(scales) - print(input.shape, output.shape, scales, shape) - resize_mode = mode if resize_mode.lower() in ["linear","bilinear","trilinear"]: layer.resize_mode = trt.ResizeMode.LINEAR From a4f0b5257435a4bc0150ccd386f2134ba02af0b1 Mon Sep 17 00:00:00 2001 From: Oliver Batchelor Date: Sun, 16 Feb 2020 13:35:49 +1300 Subject: [PATCH 271/355] Remove tools import --- torch2trt/converters/mul.py | 1 - 1 file changed, 1 deletion(-) diff --git a/torch2trt/converters/mul.py b/torch2trt/converters/mul.py index 9959e4b7..d5f3bb31 100644 --- a/torch2trt/converters/mul.py +++ b/torch2trt/converters/mul.py @@ -1,7 +1,6 @@ from torch2trt.torch2trt import * from torch2trt.module_test import add_module_test -from tools import shape @tensorrt_converter('torch.mul') @tensorrt_converter('torch.Tensor.__imul__') From 393a13dd6e3c904915bcc1efc0e29dbae617b310 Mon Sep 17 00:00:00 2001 From: Kshitij Srivastava Date: Mon, 25 May 2020 17:36:19 -0400 Subject: [PATCH 272/355] fixed trt version compatibility for torch2trt.py --- torch2trt/torch2trt.py | 238 +++++++++++++++++++++++------------------ torch2trt/utils.py | 39 ++++--- 2 files changed, 155 insertions(+), 122 deletions(-) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index c75be1b2..feda7331 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -2,14 +2,19 @@ import tensorrt as trt from copy import copy import numpy as np -from .calibration import TensorBatchDataset, DatasetCalibrator, DEFAULT_CALIBRATION_ALGORITHM - - +from torch2trt.utils import get_trt_version +from .calibration import ( + TensorBatchDataset, + DatasetCalibrator, + DEFAULT_CALIBRATION_ALGORITHM, +) + +trt_version = get_trt_version() # UTILITY FUNCTIONS def torch_dtype_to_trt(dtype): - if dtype == torch.bool: + if trt_version >= 7.1 and dtype == torch.bool: return trt.bool if dtype == torch.int8: return trt.int8 @@ -20,14 +25,14 @@ def torch_dtype_to_trt(dtype): elif dtype == torch.float32: return trt.float32 else: - raise TypeError('%s is not supported by tensorrt' % dtype) + raise TypeError("%s is not supported by tensorrt" % dtype) def torch_dtype_from_trt(dtype): if dtype == trt.int8: return torch.int8 - if dtype == trt.bool: - return torch.bool + if trt_version >= 7.1 and dtype == trt.bool: + return torch.bool elif dtype == trt.int32: return torch.int32 elif dtype == trt.float16: @@ -35,26 +40,26 @@ def torch_dtype_from_trt(dtype): elif dtype == trt.float32: return torch.float32 else: - raise TypeError('%s is not supported by torch' % dtype) + raise TypeError("%s is not supported by torch" % dtype) def torch_device_to_trt(device): - if device.type == torch.device('cuda').type: + if device.type == torch.device("cuda").type: return trt.TensorLocation.DEVICE - elif device.type == torch.device('cpu').type: + elif device.type == torch.device("cpu").type: return trt.TensorLocation.HOST else: - return TypeError('%s is not supported by tensorrt' % device) + return TypeError("%s is not supported by tensorrt" % device) def torch_device_from_trt(device): if device == trt.TensorLocation.DEVICE: - return torch.device('cuda') + return torch.device("cuda") elif device == trt.TensorLocation.HOST: - return torch.device('cpu') + return torch.device("cpu") else: - return TypeError('%s is not supported by torch' % device) - + return TypeError("%s is not supported by torch" % device) + def trt_num_inputs(engine): count = 0 @@ -62,7 +67,7 @@ def trt_num_inputs(engine): if engine.binding_is_input(i): count += 1 return count - + def trt_num_outputs(engine): count = 0 @@ -75,16 +80,16 @@ def trt_num_outputs(engine): def torch_dim_to_trt_axes(dim): """Converts torch dim, or tuple of dims to a tensorrt axes bitmask""" if not isinstance(dim, tuple): - dim = (dim, ) - + dim = (dim,) + # create axes bitmask for reduce layer axes = 0 for d in dim: - axes |= 1 << (d - 1) # -1 to remove batch dimension - + axes |= 1 << (d - 1) # -1 to remove batch dimension + return axes - - + + def add_trt_constant(network, tensor): shape = tuple(tensor.shape[1:]) array = tensor[0].detach().cpu().numpy() @@ -99,56 +104,59 @@ def check_torch_dtype(*tensors): if dtype is None: dtype = t.dtype else: - assert(dtype == t.dtype)#, 'Tensor data types must match') - assert(dtype is not None)#, 'Data type could not be inferred from any item in list') + assert dtype == t.dtype # , 'Tensor data types must match') + assert ( + dtype is not None + ) # , 'Data type could not be inferred from any item in list') return dtype - + def trt_(network, *tensors): """Creates missing TensorRT tensors and adds shuffle layers to make tensors broadcastable""" trt_tensors = [None] * len(tensors) - + dtype = check_torch_dtype(*tensors) - + # get broadcast dimension broadcast_num_dim = 0 for t in tensors: if isinstance(t, torch.Tensor): - if not hasattr(t, '_trt'): - num_dim = len(t.shape) # don't exclude batch for constants + if not hasattr(t, "_trt"): + num_dim = len(t.shape) # don't exclude batch for constants else: - num_dim = len(t._trt.shape) # non-leaf tensors must already have _trt, get shape from that + num_dim = len( + t._trt.shape + ) # non-leaf tensors must already have _trt, get shape from that if num_dim > broadcast_num_dim: broadcast_num_dim = num_dim - - + for i, t in enumerate(tensors): trt_tensor = None - + # GET TRT TENSOR (OR CREATE TRT CONSTANT) - + # get tensor w/ _trt - if isinstance(t, torch.Tensor) and hasattr(t, '_trt'): + if isinstance(t, torch.Tensor) and hasattr(t, "_trt"): trt_tensor = t._trt - + # or... add constant for leaf tensor w/o _trt - elif isinstance(t, torch.Tensor) and not hasattr(t, '_trt'): + elif isinstance(t, torch.Tensor) and not hasattr(t, "_trt"): # add leaf tensor - shape = tuple(t.shape) # don't exclude batch when adding constants...? + shape = tuple(t.shape) # don't exclude batch when adding constants...? weight = t.detach().cpu().numpy() t._trt = network.add_constant(shape, weight).get_output(0) trt_tensor = t._trt - + # or... add constant for scalar primitive elif isinstance(t, float) or isinstance(t, int): shape = (1,) * broadcast_num_dim scalar = t * torch.ones(shape, dtype=dtype).cpu().numpy() trt_tensor = network.add_constant(shape, scalar).get_output(0) - - assert(trt_tensor is not None) - + + assert trt_tensor is not None + # MAKE TRT TENSOR BROADCASTABLE IF IT IS NOT ALREADY - + if len(trt_tensor.shape) < broadcast_num_dim: # append 1 size dims to front diff = broadcast_num_dim - len(trt_tensor.shape) @@ -156,21 +164,21 @@ def trt_(network, *tensors): layer = network.add_shuffle(trt_tensor) layer.reshape_dims = shape trt_tensor = layer.get_output(0) - + trt_tensors[i] = trt_tensor - + if len(trt_tensors) == 1: return trt_tensors[0] else: return tuple(trt_tensors) - + # CONVERSION REGISTRY AND HOOKS CONVERTERS = {} - - + + def get_arg(ctx, name, pos, default): if name in ctx.method_kwargs: return ctx.method_kwargs[name] @@ -178,32 +186,32 @@ def get_arg(ctx, name, pos, default): return ctx.method_args[pos] else: return default - + def attach_converter(ctx, method, converter, method_str): """Gets a function that executes PyTorch method and TensorRT converter""" global DUMMY_CONVERTERS - + def wrapper(*args, **kwargs): skip = True - + # check if another (parent) converter has lock if not ctx.lock: - if converter['is_real']: + if converter["is_real"]: ctx.lock = True # only real converters can acquire lock skip = False # run original method outputs = method(*args, **kwargs) - + if not skip: ctx.method_args = args ctx.method_kwargs = kwargs ctx.method_return = outputs ctx.method_str = method_str - -# print('%s' % (converter.__name__,)) - converter['converter'](ctx) + + # print('%s' % (converter.__name__,)) + converter["converter"](ctx) # convert to None so conversion will fail for unsupported layers ctx.method_args = None @@ -225,16 +233,20 @@ def __init__(self, ctx, method, converter): self.converter = converter def _set_method(self, method): - exec('%s = method' % self.method_str) + exec("%s = method" % self.method_str) - def __enter__(self): + def __enter__(self): try: self.method_impl = eval(self.method_str) except AttributeError: self.method_impl = None - + if self.method_impl: - self._set_method(attach_converter(self.ctx, self.method_impl, self.converter, self.method_str)) + self._set_method( + attach_converter( + self.ctx, self.method_impl, self.converter, self.method_str + ) + ) def __exit__(self, type, val, tb): if self.method_impl: @@ -264,11 +276,11 @@ def __exit__(self, type, val, tb): def add_inputs(self, torch_inputs, names=None): if names is None: - names = ['input_%d' % i for i in range(len(torch_inputs))] + names = ["input_%d" % i for i in range(len(torch_inputs))] self.input_names = names for i, torch_input in enumerate(torch_inputs): - if not hasattr(torch_input, '_trt'): + if not hasattr(torch_input, "_trt"): trt_tensor = self.network.add_input( name=names[i], shape=tuple(torch_input.shape)[1:], @@ -279,7 +291,7 @@ def add_inputs(self, torch_inputs, names=None): def mark_outputs(self, torch_outputs, names=None): if names is None: - names = ['output_%d' % i for i in range(len(torch_outputs))] + names = ["output_%d" % i for i in range(len(torch_outputs))] self.output_names = names for i, torch_output in enumerate(torch_outputs): @@ -299,22 +311,31 @@ def __init__(self, engine=None, input_names=None, output_names=None): self.context = self.engine.create_execution_context() self.input_names = input_names self.output_names = output_names - + def _on_state_dict(self, state_dict, prefix, local_metadata): - state_dict[prefix + 'engine'] = bytearray(self.engine.serialize()) - state_dict[prefix + 'input_names'] = self.input_names - state_dict[prefix + 'output_names'] = self.output_names - - def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs): - engine_bytes = state_dict[prefix + 'engine'] - + state_dict[prefix + "engine"] = bytearray(self.engine.serialize()) + state_dict[prefix + "input_names"] = self.input_names + state_dict[prefix + "output_names"] = self.output_names + + def _load_from_state_dict( + self, + state_dict, + prefix, + local_metadata, + strict, + missing_keys, + unexpected_keys, + error_msgs, + ): + engine_bytes = state_dict[prefix + "engine"] + with trt.Logger() as logger, trt.Runtime(logger) as runtime: self.engine = runtime.deserialize_cuda_engine(engine_bytes) self.context = self.engine.create_execution_context() - - self.input_names = state_dict[prefix + 'input_names'] - self.output_names = state_dict[prefix + 'output_names'] - + + self.input_names = state_dict[prefix + "input_names"] + self.output_names = state_dict[prefix + "output_names"] + def forward(self, *inputs): batch_size = inputs[0].shape[0] bindings = [None] * (len(self.input_names) + len(self.output_names)) @@ -324,7 +345,7 @@ def forward(self, *inputs): for i, output_name in enumerate(self.output_names): idx = self.engine.get_binding_index(output_name) dtype = torch_dtype_from_trt(self.engine.get_binding_dtype(idx)) - shape = (batch_size, ) + tuple(self.engine.get_binding_shape(idx)) + shape = (batch_size,) + tuple(self.engine.get_binding_shape(idx)) device = torch_device_from_trt(self.engine.get_location(idx)) output = torch.empty(size=shape, dtype=dtype, device=device) outputs[i] = output @@ -334,79 +355,85 @@ def forward(self, *inputs): idx = self.engine.get_binding_index(input_name) bindings[idx] = inputs[i].data_ptr() - self.context.execute_async(batch_size, bindings, torch.cuda.current_stream().cuda_stream) + self.context.execute_async( + batch_size, bindings, torch.cuda.current_stream().cuda_stream + ) outputs = tuple(outputs) if len(outputs) == 1: outputs = outputs[0] return outputs - + def enable_profiling(self): if not self.context.profiler: self.context.profiler = trt.Profiler() -def torch2trt(module, - inputs, - input_names=None, - output_names=None, - log_level=trt.Logger.ERROR, - max_batch_size=1, - fp16_mode=False, - max_workspace_size=0, - strict_type_constraints=False, - keep_network=True, - int8_mode=False, - int8_calib_dataset=None, - int8_calib_algorithm=DEFAULT_CALIBRATION_ALGORITHM): +def torch2trt( + module, + inputs, + input_names=None, + output_names=None, + log_level=trt.Logger.ERROR, + max_batch_size=1, + fp16_mode=False, + max_workspace_size=0, + strict_type_constraints=False, + keep_network=True, + int8_mode=False, + int8_calib_dataset=None, + int8_calib_algorithm=DEFAULT_CALIBRATION_ALGORITHM, +): inputs_in = inputs - + # copy inputs to avoid modifications to source data inputs = [tensor.clone()[0:1] for tensor in inputs] # only run single entry - + logger = trt.Logger(log_level) builder = trt.Builder(logger) network = builder.create_network() - + with ConversionContext(network) as ctx: if isinstance(inputs, list): inputs = tuple(inputs) if not isinstance(inputs, tuple): - inputs = (inputs, ) + inputs = (inputs,) ctx.add_inputs(inputs, input_names) outputs = module(*inputs) if not isinstance(outputs, tuple) and not isinstance(outputs, list): - outputs = (outputs, ) + outputs = (outputs,) ctx.mark_outputs(outputs, output_names) builder.max_workspace_size = max_workspace_size builder.fp16_mode = fp16_mode builder.max_batch_size = max_batch_size builder.strict_type_constraints = strict_type_constraints - + if int8_mode: - + # default to use input tensors for calibration if int8_calib_dataset is None: int8_calib_dataset = TensorBatchDataset(inputs_in) - + builder.int8_mode = True - + # @TODO(jwelsh): Should we set batch_size=max_batch_size? Need to investigate memory consumption - builder.int8_calibrator = DatasetCalibrator(inputs, int8_calib_dataset, batch_size=1, algorithm=int8_calib_algorithm) + builder.int8_calibrator = DatasetCalibrator( + inputs, int8_calib_dataset, batch_size=1, algorithm=int8_calib_algorithm + ) engine = builder.build_cuda_engine(network) - + module_trt = TRTModule(engine, ctx.input_names, ctx.output_names) - + if keep_network: module_trt.network = network - + return module_trt @@ -415,6 +442,7 @@ def torch2trt(module, def tensorrt_converter(method, is_real=True): def register_converter(converter): - CONVERTERS[method] = {'converter': converter, 'is_real': is_real} + CONVERTERS[method] = {"converter": converter, "is_real": is_real} return converter + return register_converter diff --git a/torch2trt/utils.py b/torch2trt/utils.py index c3423328..95fcd745 100644 --- a/torch2trt/utils.py +++ b/torch2trt/utils.py @@ -1,62 +1,67 @@ import graphviz +import tensorrt as trt def trt_network_to_dot_graph(network): - dot = graphviz.Digraph(comment='Network') - + dot = graphviz.Digraph(comment="Network") + # add nodes (layers) for i in range(network.num_layers): layer = network.get_layer(i) dot.node(layer.name) - + # add nodes (inputs) for i in range(network.num_inputs): dot.node(network.get_input(i).name) - + # add nodes (outputs) for i in range(network.num_outputs): dot.node(network.get_output(i).name) - + # add layer->layer edges for a in range(network.num_layers): layer_a = network.get_layer(a) - + for b in range(network.num_layers): layer_b = network.get_layer(b) - + for i in range(layer_a.num_outputs): output_i = layer_a.get_output(i) - + for j in range(layer_b.num_inputs): input_j = layer_b.get_input(j) - + if output_i == input_j: dot.edge(layer_a.name, layer_b.name, label=str(input_j.shape)) - + # add input->layer edges for i in range(network.num_inputs): input_i = network.get_input(i) - + for b in range(network.num_layers): layer_b = network.get_layer(b) - + for j in range(layer_b.num_inputs): input_j = layer_b.get_input(j) if input_i == input_j: dot.edge(input_i.name, layer_b.name, label=str(input_j.shape)) - + # add layer->output edges for i in range(network.num_outputs): input_i = network.get_output(i) - + for b in range(network.num_layers): layer_b = network.get_layer(b) - + for j in range(layer_b.num_outputs): input_j = layer_b.get_output(j) if input_i == input_j: dot.edge(layer_b.name, input_i.name, label=str(input_j.shape)) - - return dot \ No newline at end of file + + return dot + + +def get_trt_version(): + return float(trt.__version__[:3]) From 401be2e655f8549978a0aa750dfcb32dcef88bdb Mon Sep 17 00:00:00 2001 From: Kshitij Srivastava Date: Mon, 25 May 2020 19:27:04 -0400 Subject: [PATCH 273/355] separated trt 7 ops --- torch2trt/converters/__init__.py | 32 ++++++++++++------ torch2trt/converters/transpose.py | 7 ++-- torch2trt/converters/{ => trt7_ops}/Conv.py | 0 .../{ => trt7_ops}/ConvTranspose.py | 0 torch2trt/converters/trt7_ops/__init__.py | 0 .../converters/{ => trt7_ops}/avg_pool.py | 0 .../converters/{ => trt7_ops}/batch_norm.py | 0 .../converters/{ => trt7_ops}/compare.py | 0 torch2trt/converters/{ => trt7_ops}/stack.py | 0 torch2trt/converters/trt7_ops/transpose.py | 33 +++++++++++++++++++ torch2trt/torch2trt.py | 4 +-- 11 files changed, 61 insertions(+), 15 deletions(-) rename torch2trt/converters/{ => trt7_ops}/Conv.py (100%) rename torch2trt/converters/{ => trt7_ops}/ConvTranspose.py (100%) create mode 100644 torch2trt/converters/trt7_ops/__init__.py rename torch2trt/converters/{ => trt7_ops}/avg_pool.py (100%) rename torch2trt/converters/{ => trt7_ops}/batch_norm.py (100%) rename torch2trt/converters/{ => trt7_ops}/compare.py (100%) rename torch2trt/converters/{ => trt7_ops}/stack.py (100%) create mode 100644 torch2trt/converters/trt7_ops/transpose.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index e63fc48b..4a52b316 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -1,25 +1,22 @@ # dummy converters throw warnings method encountered import tensorrt as trt from .dummy_converters import * +from torch2trt.utils import get_trt_version # supported converters will override dummy converters +trt_version = get_trt_version() from .activation import * from .adaptive_avg_pool2d import * from .adaptive_max_pool2d import * from .AdaptiveAvgPool2d import * from .add import * -from .avg_pool import * from .mul import * -from .compare import * from .div import * from .BatchNorm1d import * -from .batch_norm import * from .cat import * from .clamp import * from .Conv1d import * -from .Conv import * -from .ConvTranspose import * from .getitem import * from .identity import * from .Identity import * @@ -40,23 +37,38 @@ from .relu6 import * from .ReLU6 import * from .sigmoid import * -from .stack import * from .sub import * from .sum import * from .view import * from .tanh import * -from .transpose import * from .mean import * from .softmax import * from .split import * from .chunk import * from .unary import * -if int(trt.__version__.split(".")[0]) >= 6: - print("using tensorrt version {}".format(trt.__version__.split(".")[0])) +## Some ops implementation has been changed based on trt version. + +if trt_version < 7.0: ##TRT ops supported in trt 5 and 6 + from .avg_pool2d import * + from .BatchNorm2d import * + from .Conv2d import * + from .ConvTranspose2d import * + from .transpose import * + +if trt_version >= 7.0: + from .trt7_ops.avg_pool import * + from .trt7_ops.compare import * + from .trt7_ops.batch_norm import * + from .trt7_ops.Conv import * + from .trt7_ops.ConvTranspose import * + from .trt7_ops.stack import * + from .trt7_ops.transpose import * + +## Upsample op will be fixed in 7.1 , hence a special case +if trt_version >= 7.1: from .upsample import * else: - print("using tensorrt version {}".format(trt.__version__.split(".")[0])) try: from .interpolate import * except: diff --git a/torch2trt/converters/transpose.py b/torch2trt/converters/transpose.py index 2ea5f7bb..5aae616e 100644 --- a/torch2trt/converters/transpose.py +++ b/torch2trt/converters/transpose.py @@ -2,7 +2,7 @@ from torch2trt.module_test import add_module_test -@tensorrt_converter('torch.transpose') +@tensorrt_converter("torch.transpose") def convert_transpose(ctx): input = ctx.method_args[0] input_trt = trt_(ctx.network, input) @@ -23,11 +23,12 @@ def __init__(self, dim0, dim1): super(Transpose, self).__init__() self.dim0 = dim0 self.dim1 = dim1 + def forward(self, x): return torch.transpose(x, self.dim0, self.dim1).contiguous() -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 3, 3)]) def test_transpose_12(): return Transpose(1, 2) diff --git a/torch2trt/converters/Conv.py b/torch2trt/converters/trt7_ops/Conv.py similarity index 100% rename from torch2trt/converters/Conv.py rename to torch2trt/converters/trt7_ops/Conv.py diff --git a/torch2trt/converters/ConvTranspose.py b/torch2trt/converters/trt7_ops/ConvTranspose.py similarity index 100% rename from torch2trt/converters/ConvTranspose.py rename to torch2trt/converters/trt7_ops/ConvTranspose.py diff --git a/torch2trt/converters/trt7_ops/__init__.py b/torch2trt/converters/trt7_ops/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/torch2trt/converters/avg_pool.py b/torch2trt/converters/trt7_ops/avg_pool.py similarity index 100% rename from torch2trt/converters/avg_pool.py rename to torch2trt/converters/trt7_ops/avg_pool.py diff --git a/torch2trt/converters/batch_norm.py b/torch2trt/converters/trt7_ops/batch_norm.py similarity index 100% rename from torch2trt/converters/batch_norm.py rename to torch2trt/converters/trt7_ops/batch_norm.py diff --git a/torch2trt/converters/compare.py b/torch2trt/converters/trt7_ops/compare.py similarity index 100% rename from torch2trt/converters/compare.py rename to torch2trt/converters/trt7_ops/compare.py diff --git a/torch2trt/converters/stack.py b/torch2trt/converters/trt7_ops/stack.py similarity index 100% rename from torch2trt/converters/stack.py rename to torch2trt/converters/trt7_ops/stack.py diff --git a/torch2trt/converters/trt7_ops/transpose.py b/torch2trt/converters/trt7_ops/transpose.py new file mode 100644 index 00000000..2ea5f7bb --- /dev/null +++ b/torch2trt/converters/trt7_ops/transpose.py @@ -0,0 +1,33 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.transpose') +def convert_transpose(ctx): + input = ctx.method_args[0] + input_trt = trt_(ctx.network, input) + output = ctx.method_return + # permutation -1 because TRT does not include batch dim + permutation = list(range(len(input.shape) - 1)) + dim0 = ctx.method_args[1] - 1 + dim1 = ctx.method_args[2] - 1 + permutation[dim0] = dim1 + permutation[dim1] = dim0 + layer = ctx.network.add_shuffle(input_trt) + layer.second_transpose = tuple(permutation) + output._trt = layer.get_output(0) + + +class Transpose(torch.nn.Module): + def __init__(self, dim0, dim1): + super(Transpose, self).__init__() + self.dim0 = dim0 + self.dim1 = dim1 + def forward(self, x): + return torch.transpose(x, self.dim0, self.dim1).contiguous() + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_transpose_12(): + return Transpose(1, 2) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index feda7331..7247cb61 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -14,7 +14,7 @@ def torch_dtype_to_trt(dtype): - if trt_version >= 7.1 and dtype == torch.bool: + if trt_version >= 7.0 and dtype == torch.bool: return trt.bool if dtype == torch.int8: return trt.int8 @@ -31,7 +31,7 @@ def torch_dtype_to_trt(dtype): def torch_dtype_from_trt(dtype): if dtype == trt.int8: return torch.int8 - if trt_version >= 7.1 and dtype == trt.bool: + if trt_version >= 7.0 and dtype == trt.bool: return torch.bool elif dtype == trt.int32: return torch.int32 From 8cb3606ed43312d16c07f583a1ee42b1f584947a Mon Sep 17 00:00:00 2001 From: Kshitij Srivastava Date: Mon, 25 May 2020 19:27:39 -0400 Subject: [PATCH 274/355] added original ops --- torch2trt/converters/BatchNorm2d.py | 23 +++++++++ torch2trt/converters/Conv2d.py | 68 +++++++++++++++++++++++++ torch2trt/converters/ConvTranspose2d.py | 42 +++++++++++++++ torch2trt/converters/avg_pool2d.py | 57 +++++++++++++++++++++ 4 files changed, 190 insertions(+) create mode 100644 torch2trt/converters/BatchNorm2d.py create mode 100644 torch2trt/converters/Conv2d.py create mode 100644 torch2trt/converters/ConvTranspose2d.py create mode 100644 torch2trt/converters/avg_pool2d.py diff --git a/torch2trt/converters/BatchNorm2d.py b/torch2trt/converters/BatchNorm2d.py new file mode 100644 index 00000000..1e390a5b --- /dev/null +++ b/torch2trt/converters/BatchNorm2d.py @@ -0,0 +1,23 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter("torch.nn.BatchNorm2d.forward") +def convert_BatchNorm2d(ctx): + module = ctx.method_args[0] + input = ctx.method_args[1] + input_trt = trt_(ctx.network, input) + output = ctx.method_return + + scale = module.weight.detach().cpu().numpy() / np.sqrt( + module.running_var.detach().cpu().numpy() + module.eps + ) + bias = ( + module.bias.detach().cpu().numpy() + - module.running_mean.detach().cpu().numpy() * scale + ) + power = np.ones_like(scale) + + layer = ctx.network.add_scale(input_trt, trt.ScaleMode.CHANNEL, bias, scale, power) + + output._trt = layer.get_output(0) diff --git a/torch2trt/converters/Conv2d.py b/torch2trt/converters/Conv2d.py new file mode 100644 index 00000000..613880b1 --- /dev/null +++ b/torch2trt/converters/Conv2d.py @@ -0,0 +1,68 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter("torch.nn.Conv2d.forward") +def convert_Conv2d(ctx): + module = ctx.method_args[0] + input = ctx.method_args[1] + input_trt = trt_(ctx.network, input) + output = ctx.method_return + + kernel_size = module.kernel_size + if not isinstance(kernel_size, tuple): + kernel_size = (kernel_size,) * 2 + + stride = module.stride + if not isinstance(stride, tuple): + stride = (stride,) * 2 + + padding = module.padding + if not isinstance(padding, tuple): + padding = (padding,) * 2 + + dilation = module.dilation + if not isinstance(dilation, tuple): + dilation = (dilation,) * 2 + + kernel = module.weight.detach().cpu().numpy() + + bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) + if module.bias is not None: + bias = module.bias.detach().cpu().numpy() + + layer = ctx.network.add_convolution( + input=input_trt, + num_output_maps=module.out_channels, + kernel_shape=kernel_size, + kernel=kernel, + bias=bias, + ) + layer.stride = stride + layer.padding = padding + layer.dilation = dilation + + if module.groups is not None: + layer.num_groups = module.groups + + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device("cuda"), [(1, 10, 224, 224)]) +def test_Conv2d_basic(): + return torch.nn.Conv2d(10, 5, kernel_size=1, stride=1, padding=0) + + +@add_module_test(torch.float32, torch.device("cuda"), [(1, 10, 224, 224)]) +def test_Conv2d_stride2(): + return torch.nn.Conv2d(10, 5, kernel_size=1, stride=2, padding=0) + + +@add_module_test(torch.float32, torch.device("cuda"), [(1, 10, 224, 224)]) +def test_Conv2d_kernel3(): + return torch.nn.Conv2d(10, 5, kernel_size=3, stride=2, padding=1) + + +@add_module_test(torch.float32, torch.device("cuda"), [(1, 10, 224, 224)]) +def test_Conv2d_dilation2(): + return torch.nn.Conv2d(10, 5, kernel_size=3, stride=1, padding=1, dilation=2) diff --git a/torch2trt/converters/ConvTranspose2d.py b/torch2trt/converters/ConvTranspose2d.py new file mode 100644 index 00000000..c8697010 --- /dev/null +++ b/torch2trt/converters/ConvTranspose2d.py @@ -0,0 +1,42 @@ +from torch2trt.torch2trt import * + + +@tensorrt_converter("torch.nn.ConvTranspose2d.forward") +def convert_ConvTranspose2d(ctx): + module = ctx.method_args[0] + input = ctx.method_args[1] + input_trt = trt_(ctx.network, input) + output = ctx.method_return + + kernel_size = module.kernel_size + if not isinstance(kernel_size, tuple): + kernel_size = (kernel_size,) * 2 + + stride = module.stride + if not isinstance(stride, tuple): + stride = (stride,) * 2 + + padding = module.padding + if not isinstance(padding, tuple): + padding = (padding,) * 2 + + kernel = module.weight.detach().cpu().numpy() + + bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) + if module.bias is not None: + bias = module.bias.detach().cpu().numpy() + + layer = ctx.network.add_deconvolution( + input=input_trt, + num_output_maps=module.out_channels, + kernel_shape=kernel_size, + kernel=kernel, + bias=bias, + ) + layer.stride = stride + layer.padding = padding + + if module.groups is not None: + layer.num_groups = module.groups + + output._trt = layer.get_output(0) diff --git a/torch2trt/converters/avg_pool2d.py b/torch2trt/converters/avg_pool2d.py new file mode 100644 index 00000000..e4d243c6 --- /dev/null +++ b/torch2trt/converters/avg_pool2d.py @@ -0,0 +1,57 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter("torch.nn.functional.avg_pool2d") +def convert_avg_pool2d(ctx): + # parse args + input = get_arg(ctx, "input", pos=0, default=None) + kernel_size = get_arg(ctx, "kernel_size", pos=1, default=None) + stride = get_arg(ctx, "stride", pos=2, default=None) + padding = get_arg(ctx, "padding", pos=3, default=0) + ceil_mode = get_arg(ctx, "ceil_mode", pos=4, default=False) + count_include_pad = get_arg(ctx, "count_include_pad", pos=5, default=True) + + # get input trt tensor (or create constant if it doesn't exist) + input_trt = trt_(ctx.network, input) + + output = ctx.method_return + + # get kernel size + if not isinstance(kernel_size, tuple): + kernel_size = (kernel_size,) * 2 + + # get stride + if not isinstance(stride, tuple): + stride = (stride,) * 2 + + # get padding + if not isinstance(padding, tuple): + padding = (padding,) * 2 + + layer = ctx.network.add_pooling( + input=input_trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size + ) + + layer.stride = stride + layer.padding = padding + layer.average_count_excludes_padding = not count_include_pad + + if ceil_mode: + layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP + + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 4, 6)]) +@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 5, 7)]) +def test_avg_pool2d_without_ceil_mode(): + return torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False) + + +@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 4, 6)]) +@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 5, 7)]) +def test_avg_pool2d_with_ceil_mode(): + return torch.nn.AvgPool2d( + kernel_size=3, stride=2, padding=1, ceil_mode=True, count_include_pad=False + ) # TRT does not support ceil_mode=True && count_include_pad=True From 3ce13715fa310cac5cab758df8b77646a4d357f6 Mon Sep 17 00:00:00 2001 From: Kshitij Srivastava Date: Mon, 25 May 2020 19:54:47 -0400 Subject: [PATCH 275/355] reverting to default for trt 7 --- torch2trt/torch2trt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 7247cb61..b9b06a73 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -14,7 +14,7 @@ def torch_dtype_to_trt(dtype): - if trt_version >= 7.0 and dtype == torch.bool: + if trt_version >= 7 and dtype == torch.bool: return trt.bool if dtype == torch.int8: return trt.int8 From af2674cde2d1cf08535c51f703abc2d8bd80aac8 Mon Sep 17 00:00:00 2001 From: Kshitij Srivastava Date: Mon, 25 May 2020 21:24:23 -0400 Subject: [PATCH 276/355] changed value from 7 to 7.0 --- torch2trt/torch2trt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index b9b06a73..7247cb61 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -14,7 +14,7 @@ def torch_dtype_to_trt(dtype): - if trt_version >= 7 and dtype == torch.bool: + if trt_version >= 7.0 and dtype == torch.bool: return trt.bool if dtype == torch.int8: return trt.int8 From 3c16ea5373809efa283b8cd2f2a0f2d29de30d11 Mon Sep 17 00:00:00 2001 From: Kshitij Srivastava Date: Wed, 3 Jun 2020 20:21:47 -0400 Subject: [PATCH 277/355] added unit test for ConvTranspose2d and fixed an indentation error in parent PR --- torch2trt/converters/ConvTranspose2d.py | 19 +++++++++++++++++-- torch2trt/torch2trt.py | 2 +- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/torch2trt/converters/ConvTranspose2d.py b/torch2trt/converters/ConvTranspose2d.py index e65a62c6..058b4ef2 100644 --- a/torch2trt/converters/ConvTranspose2d.py +++ b/torch2trt/converters/ConvTranspose2d.py @@ -1,5 +1,5 @@ from torch2trt.torch2trt import * - +from torch2trt.module_test import add_module_test @tensorrt_converter('torch.nn.ConvTranspose2d.forward') def convert_ConvTranspose2d(ctx): @@ -40,9 +40,24 @@ def convert_ConvTranspose2d(ctx): layer.pre_padding = padding layer.post_padding = trt.tensorrt.DimsHW(padding[0] - output_padding[0], padding[1] - output_padding[1]) else: - layer.padding = padding + layer.padding = padding if module.groups is not None: layer.num_groups = module.groups output._trt = layer.get_output(0) + +@add_module_test(torch.float32, torch.device("cuda"), [(1,3,224,224)]) +def test_square_kernel_equal_stride_mode(): + return torch.nn.ConvTranspose2d(3,3,3,stride=2) + +@add_module_test(torch.float32, torch.device("cuda"), [(1,3,224,224)]) +def test_square_kernel_equal_stride_mode_unequal_op_size(): + return torch.nn.ConvTranspose2d(3,6,3,stride=2) + +@add_module_test(torch.float32, torch.device("cuda"), [(1,3,224,224)]) +def test_unequal_stride_mode(): + return torch.nn.ConvTranspose2d(3,3,3, stride=(2,1), padding=(4,2)) + + + diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index bd860ab0..763d22f6 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -350,7 +350,7 @@ def torch2trt(module, log_level=trt.Logger.ERROR, max_batch_size=1, fp16_mode=False, - max_workspace_size=0, + max_workspace_size=1<<25, strict_type_constraints=False, keep_network=True, int8_mode=False, From 512a511de553f5c11f980050561595d1d58e77cd Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 4 Jun 2020 14:43:22 -0700 Subject: [PATCH 278/355] added testcase for commonly used kernel size 4, stride 2, padding 1 convtranspose configuration --- torch2trt/converters/ConvTranspose2d.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/torch2trt/converters/ConvTranspose2d.py b/torch2trt/converters/ConvTranspose2d.py index 058b4ef2..6b74f35c 100644 --- a/torch2trt/converters/ConvTranspose2d.py +++ b/torch2trt/converters/ConvTranspose2d.py @@ -59,5 +59,7 @@ def test_square_kernel_equal_stride_mode_unequal_op_size(): def test_unequal_stride_mode(): return torch.nn.ConvTranspose2d(3,3,3, stride=(2,1), padding=(4,2)) - - +@add_module_test(torch.float32, torch.device("cuda"), [(1,3,112,112)]) +@add_module_test(torch.float32, torch.device("cuda"), [(1,3,7,7)]) +def test_kernelsize_4(): + return torch.nn.ConvTranspose2d(3,3,4, stride=2, padding=1) From b2c8c1360d4d313b233ecea4d74d25f3b633de75 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 9 Jun 2020 13:35:29 -0700 Subject: [PATCH 279/355] added enabled kwarg to register_converter and add_module_test --- torch2trt/converters/BatchNorm2d.py | 2 +- torch2trt/converters/Conv2d.py | 10 ++--- torch2trt/converters/ConvTranspose2d.py | 12 ++--- torch2trt/converters/__init__.py | 45 +++++++++---------- torch2trt/converters/avg_pool2d.py | 10 ++--- torch2trt/converters/interpolate.py | 14 +++--- torch2trt/converters/transpose.py | 6 +-- torch2trt/converters/trt7_ops/Conv.py | 40 ++++++++--------- .../converters/trt7_ops/ConvTranspose.py | 30 ++++++------- torch2trt/converters/trt7_ops/avg_pool.py | 30 ++++++------- torch2trt/converters/trt7_ops/batch_norm.py | 18 ++++---- torch2trt/converters/trt7_ops/compare.py | 18 ++++---- torch2trt/converters/trt7_ops/stack.py | 14 +++--- torch2trt/converters/trt7_ops/transpose.py | 10 ++--- torch2trt/converters/upsample.py | 38 ++++++++-------- torch2trt/module_test.py | 13 +++++- torch2trt/torch2trt.py | 24 +++++++--- torch2trt/utils.py | 4 -- 18 files changed, 175 insertions(+), 163 deletions(-) diff --git a/torch2trt/converters/BatchNorm2d.py b/torch2trt/converters/BatchNorm2d.py index 1e390a5b..b13349dd 100644 --- a/torch2trt/converters/BatchNorm2d.py +++ b/torch2trt/converters/BatchNorm2d.py @@ -2,7 +2,7 @@ from torch2trt.module_test import add_module_test -@tensorrt_converter("torch.nn.BatchNorm2d.forward") +@tensorrt_converter("torch.nn.BatchNorm2d.forward", enabled=trt_version() < '7.0') def convert_BatchNorm2d(ctx): module = ctx.method_args[0] input = ctx.method_args[1] diff --git a/torch2trt/converters/Conv2d.py b/torch2trt/converters/Conv2d.py index 613880b1..40af1cdc 100644 --- a/torch2trt/converters/Conv2d.py +++ b/torch2trt/converters/Conv2d.py @@ -2,7 +2,7 @@ from torch2trt.module_test import add_module_test -@tensorrt_converter("torch.nn.Conv2d.forward") +@tensorrt_converter("torch.nn.Conv2d.forward", enabled=trt_version() < '7.0') def convert_Conv2d(ctx): module = ctx.method_args[0] input = ctx.method_args[1] @@ -48,21 +48,21 @@ def convert_Conv2d(ctx): output._trt = layer.get_output(0) -@add_module_test(torch.float32, torch.device("cuda"), [(1, 10, 224, 224)]) +@add_module_test(torch.float32, torch.device("cuda"), [(1, 10, 224, 224)], enabled=trt_version() < '7.0') def test_Conv2d_basic(): return torch.nn.Conv2d(10, 5, kernel_size=1, stride=1, padding=0) -@add_module_test(torch.float32, torch.device("cuda"), [(1, 10, 224, 224)]) +@add_module_test(torch.float32, torch.device("cuda"), [(1, 10, 224, 224)], enabled=trt_version() < '7.0') def test_Conv2d_stride2(): return torch.nn.Conv2d(10, 5, kernel_size=1, stride=2, padding=0) -@add_module_test(torch.float32, torch.device("cuda"), [(1, 10, 224, 224)]) +@add_module_test(torch.float32, torch.device("cuda"), [(1, 10, 224, 224)], enabled=trt_version() < '7.0') def test_Conv2d_kernel3(): return torch.nn.Conv2d(10, 5, kernel_size=3, stride=2, padding=1) -@add_module_test(torch.float32, torch.device("cuda"), [(1, 10, 224, 224)]) +@add_module_test(torch.float32, torch.device("cuda"), [(1, 10, 224, 224)], enabled=trt_version() < '7.0') def test_Conv2d_dilation2(): return torch.nn.Conv2d(10, 5, kernel_size=3, stride=1, padding=1, dilation=2) diff --git a/torch2trt/converters/ConvTranspose2d.py b/torch2trt/converters/ConvTranspose2d.py index 5067d7b0..05dee7fc 100644 --- a/torch2trt/converters/ConvTranspose2d.py +++ b/torch2trt/converters/ConvTranspose2d.py @@ -1,7 +1,7 @@ from torch2trt.torch2trt import * from torch2trt.module_test import add_module_test -@tensorrt_converter("torch.nn.ConvTranspose2d.forward") +@tensorrt_converter("torch.nn.ConvTranspose2d.forward", enabled=trt_version() < '7.0') def convert_ConvTranspose2d(ctx): module = ctx.method_args[0] input = ctx.method_args[1] @@ -49,20 +49,20 @@ def convert_ConvTranspose2d(ctx): output._trt = layer.get_output(0) -@add_module_test(torch.float32, torch.device("cuda"), [(1,3,224,224)]) +@add_module_test(torch.float32, torch.device("cuda"), [(1,3,224,224)], enabled=trt_version() < '7.0') def test_square_kernel_equal_stride_mode(): return torch.nn.ConvTranspose2d(3,3,3,stride=2) -@add_module_test(torch.float32, torch.device("cuda"), [(1,3,224,224)]) +@add_module_test(torch.float32, torch.device("cuda"), [(1,3,224,224)], enabled=trt_version() < '7.0') def test_square_kernel_equal_stride_mode_unequal_op_size(): return torch.nn.ConvTranspose2d(3,6,3,stride=2) -@add_module_test(torch.float32, torch.device("cuda"), [(1,3,224,224)]) +@add_module_test(torch.float32, torch.device("cuda"), [(1,3,224,224)], enabled=trt_version() < '7.0') def test_unequal_stride_mode(): return torch.nn.ConvTranspose2d(3,3,3, stride=(2,1), padding=(4,2)) -@add_module_test(torch.float32, torch.device("cuda"), [(1,3,112,112)]) -@add_module_test(torch.float32, torch.device("cuda"), [(1,3,7,7)]) +@add_module_test(torch.float32, torch.device("cuda"), [(1,3,112,112)], enabled=trt_version() < '7.0') +@add_module_test(torch.float32, torch.device("cuda"), [(1,3,7,7)], enabled=trt_version() < '7.0') def test_kernelsize_4(): return torch.nn.ConvTranspose2d(3,3,4, stride=2, padding=1) diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 4a52b316..1ca1a416 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -1,10 +1,8 @@ # dummy converters throw warnings method encountered import tensorrt as trt from .dummy_converters import * -from torch2trt.utils import get_trt_version # supported converters will override dummy converters -trt_version = get_trt_version() from .activation import * from .adaptive_avg_pool2d import * @@ -47,29 +45,26 @@ from .chunk import * from .unary import * -## Some ops implementation has been changed based on trt version. +# trt < 7.0 +from .avg_pool2d import * +from .BatchNorm2d import * +from .Conv2d import * +from .ConvTranspose2d import * +from .transpose import * -if trt_version < 7.0: ##TRT ops supported in trt 5 and 6 - from .avg_pool2d import * - from .BatchNorm2d import * - from .Conv2d import * - from .ConvTranspose2d import * - from .transpose import * +# trt >= 7.0 +from .trt7_ops.avg_pool import * +from .trt7_ops.compare import * +from .trt7_ops.batch_norm import * +from .trt7_ops.Conv import * +from .trt7_ops.ConvTranspose import * +from .trt7_ops.stack import * +from .trt7_ops.transpose import * -if trt_version >= 7.0: - from .trt7_ops.avg_pool import * - from .trt7_ops.compare import * - from .trt7_ops.batch_norm import * - from .trt7_ops.Conv import * - from .trt7_ops.ConvTranspose import * - from .trt7_ops.stack import * - from .trt7_ops.transpose import * +# trt >= 7.1 +from .upsample import * -## Upsample op will be fixed in 7.1 , hence a special case -if trt_version >= 7.1: - from .upsample import * -else: - try: - from .interpolate import * - except: - pass +try: + from .interpolate import * +except: + pass diff --git a/torch2trt/converters/avg_pool2d.py b/torch2trt/converters/avg_pool2d.py index e4d243c6..2b359412 100644 --- a/torch2trt/converters/avg_pool2d.py +++ b/torch2trt/converters/avg_pool2d.py @@ -2,7 +2,7 @@ from torch2trt.module_test import add_module_test -@tensorrt_converter("torch.nn.functional.avg_pool2d") +@tensorrt_converter("torch.nn.functional.avg_pool2d", enabled=trt_version() < '7.0') def convert_avg_pool2d(ctx): # parse args input = get_arg(ctx, "input", pos=0, default=None) @@ -43,14 +43,14 @@ def convert_avg_pool2d(ctx): output._trt = layer.get_output(0) -@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 4, 6)]) -@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 5, 7)]) +@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 4, 6)], enabled=trt_version() < '7.0') +@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 5, 7)], enabled=trt_version() < '7.0') def test_avg_pool2d_without_ceil_mode(): return torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False) -@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 4, 6)]) -@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 5, 7)]) +@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 4, 6)], enabled=trt_version() < '7.0') +@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 5, 7)], enabled=trt_version() < '7.0') def test_avg_pool2d_with_ceil_mode(): return torch.nn.AvgPool2d( kernel_size=3, stride=2, padding=1, ceil_mode=True, count_include_pad=False diff --git a/torch2trt/converters/interpolate.py b/torch2trt/converters/interpolate.py index 75674d5e..c1dff1ef 100644 --- a/torch2trt/converters/interpolate.py +++ b/torch2trt/converters/interpolate.py @@ -14,7 +14,7 @@ def get_interpolate_plugin(size, mode, align_corners): return creator.deserialize_plugin(PLUGIN_NAME, torch2trt_plugin.serializeToString()) -@tensorrt_converter('torch.nn.functional.interpolate') +@tensorrt_converter('torch.nn.functional.interpolate', enabled=trt_version() < '7.1') def convert_interpolate(ctx): input = ctx.method_args[0] input_trt = trt_(ctx.network, input) @@ -52,25 +52,25 @@ def forward(self, x): return F.interpolate(x, self.size, mode=self.mode, align_corners=self.align_corners) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1') def test_interpolate_nearest(): return Interpolate((224, 224), 'nearest', None) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1') def test_interpolate_bilinear(): return Interpolate((224, 224), 'bilinear', False) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1') def test_interpolate_bicubic(): return Interpolate((224, 224), 'bicubic', False) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1') def test_interpolate_area(): return Interpolate((56, 56), 'area', None) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1') def test_upsample_scale_factor2(): - return nn.Upsample(scale_factor=2, mode='bilinear',align_corners=False) \ No newline at end of file + return nn.Upsample(scale_factor=2, mode='bilinear',align_corners=False) diff --git a/torch2trt/converters/transpose.py b/torch2trt/converters/transpose.py index 5aae616e..4df56686 100644 --- a/torch2trt/converters/transpose.py +++ b/torch2trt/converters/transpose.py @@ -2,7 +2,7 @@ from torch2trt.module_test import add_module_test -@tensorrt_converter("torch.transpose") +@tensorrt_converter("torch.transpose", enabled=trt_version() < '7.0') def convert_transpose(ctx): input = ctx.method_args[0] input_trt = trt_(ctx.network, input) @@ -28,7 +28,7 @@ def forward(self, x): return torch.transpose(x, self.dim0, self.dim1).contiguous() -@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 3)]) -@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 3, 3)]) +@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 3)], enabled=trt_version() < '7.0') +@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 3, 3)], enabled=trt_version() < '7.0') def test_transpose_12(): return Transpose(1, 2) diff --git a/torch2trt/converters/trt7_ops/Conv.py b/torch2trt/converters/trt7_ops/Conv.py index 204d002e..95cf7bde 100644 --- a/torch2trt/converters/trt7_ops/Conv.py +++ b/torch2trt/converters/trt7_ops/Conv.py @@ -2,9 +2,9 @@ from torch2trt.module_test import add_module_test -@tensorrt_converter('torch.nn.Conv2d.forward') -@tensorrt_converter('torch.nn.Conv3d.forward') -def convert_Conv2d(ctx): +@tensorrt_converter('torch.nn.Conv2d.forward', enabled=trt_version() >= '7.0') +@tensorrt_converter('torch.nn.Conv3d.forward', enabled=trt_version() >= '7.0') +def convert_Conv_trt7(ctx): module = ctx.method_args[0] input = ctx.method_args[1] input_trt = trt_(ctx.network, input) @@ -51,41 +51,41 @@ def convert_Conv2d(ctx): -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)]) -def test_Conv2d_basic(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)], enabled=trt_version() >= '7.0') +def test_Conv2d_basic_trt7(): return torch.nn.Conv2d(10, 5, kernel_size=1, stride=1, padding=0) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)]) -def test_Conv2d_stride2(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)], enabled=trt_version() >= '7.0') +def test_Conv2d_stride2_trt7(): return torch.nn.Conv2d(10, 5, kernel_size=1, stride=2, padding=0) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)]) -def test_Conv2d_kernel3(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)], enabled=trt_version() >= '7.0') +def test_Conv2d_kernel3_trt7(): return torch.nn.Conv2d(10, 5, kernel_size=3, stride=2, padding=1) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)]) -def test_Conv2d_dilation2(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)], enabled=trt_version() >= '7.0') +def test_Conv2d_dilation2_trt7(): return torch.nn.Conv2d(10, 5, kernel_size=3, stride=1, padding=1, dilation=2) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)]) -def test_Conv3d_basic(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)], enabled=trt_version() >= '7.0') +def test_Conv3d_basic_trt7(): return torch.nn.Conv3d(10, 5, kernel_size=1, stride=1, padding=0) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)]) -def test_Conv3d_stride2(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)], enabled=trt_version() >= '7.0') +def test_Conv3d_stride2_trt7(): return torch.nn.Conv3d(10, 5, kernel_size=1, stride=2, padding=0) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)]) -def test_Conv3d_kernel3(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)], enabled=trt_version() >= '7.0') +def test_Conv3d_kernel3_trt7(): return torch.nn.Conv3d(10, 5, kernel_size=3, stride=2, padding=1) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)]) -def test_Conv3d_dilation2(): - return torch.nn.Conv3d(10, 5, kernel_size=3, stride=1, padding=1, dilation=2) \ No newline at end of file +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)], enabled=trt_version() >= '7.0') +def test_Conv3d_dilation2_trt7(): + return torch.nn.Conv3d(10, 5, kernel_size=3, stride=1, padding=1, dilation=2) diff --git a/torch2trt/converters/trt7_ops/ConvTranspose.py b/torch2trt/converters/trt7_ops/ConvTranspose.py index dbceb43b..1f88d46a 100644 --- a/torch2trt/converters/trt7_ops/ConvTranspose.py +++ b/torch2trt/converters/trt7_ops/ConvTranspose.py @@ -2,9 +2,9 @@ from torch2trt.module_test import add_module_test -@tensorrt_converter('torch.nn.ConvTranspose2d.forward') -@tensorrt_converter('torch.nn.ConvTranspose3d.forward') -def convert_ConvTranspose2d(ctx): +@tensorrt_converter('torch.nn.ConvTranspose2d.forward', enabled=trt_version() >= '7.0') +@tensorrt_converter('torch.nn.ConvTranspose3d.forward', enabled=trt_version() >= '7.0') +def convert_ConvTranspose2d_trt7(ctx): module = ctx.method_args[0] input = ctx.method_args[1] input_trt = trt_(ctx.network, input) @@ -48,32 +48,32 @@ def convert_ConvTranspose2d(ctx): output._trt = layer.get_output(0) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 7, 7)]) -def test_ConvTranspose2d_basic(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 7, 7)], enabled=trt_version() >= '7.0') +def test_ConvTranspose2d_basic_trt7(): return torch.nn.ConvTranspose2d(10, 5, kernel_size=1, stride=1, padding=0) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 8, 8)]) -def test_ConvTranspose2d_stride2(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 8, 8)], enabled=trt_version() >= '7.0') +def test_ConvTranspose2d_stride2_trt7(): return torch.nn.ConvTranspose2d(10, 5, kernel_size=1, stride=2, padding=0) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 9, 9)]) -def test_ConvTranspose2d_kernel3(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 9, 9)], enabled=trt_version() >= '7.0') +def test_ConvTranspose2d_kernel3_trt7(): return torch.nn.ConvTranspose2d(10, 5, kernel_size=3, stride=2, padding=1) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 7, 7, 7)]) -def test_ConvTranspose3d_basic(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 7, 7, 7)], enabled=trt_version() >= '7.0') +def test_ConvTranspose3d_basic_trt7(): return torch.nn.ConvTranspose3d(10, 5, kernel_size=1, stride=1, padding=0) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 7, 7, 7)]) -def test_ConvTranspose3d_stride2(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 7, 7, 7)], enabled=trt_version() >= '7.0') +def test_ConvTranspose3d_stride2_trt7(): return torch.nn.ConvTranspose3d(10, 5, kernel_size=1, stride=2, padding=0) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 6, 6, 6)]) -def test_ConvTranspose3d_kernel3(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 6, 6, 6)], enabled=trt_version() >= '7.0') +def test_ConvTranspose3d_kernel3_trt7(): return torch.nn.ConvTranspose3d(10, 5, kernel_size=3, stride=2, padding=1) diff --git a/torch2trt/converters/trt7_ops/avg_pool.py b/torch2trt/converters/trt7_ops/avg_pool.py index 1db9133d..6219aaad 100644 --- a/torch2trt/converters/trt7_ops/avg_pool.py +++ b/torch2trt/converters/trt7_ops/avg_pool.py @@ -2,9 +2,9 @@ from torch2trt.module_test import add_module_test -@tensorrt_converter('torch.nn.functional.avg_pool2d') -@tensorrt_converter('torch.nn.functional.avg_pool3d') -def convert_avg_pool(ctx): +@tensorrt_converter('torch.nn.functional.avg_pool2d', enabled=trt_version() >= '7.0') +@tensorrt_converter('torch.nn.functional.avg_pool3d', enabled=trt_version() >= '7.0') +def convert_avg_pool_trt7(ctx): # parse args input = get_arg(ctx, 'input', pos=0, default=None) kernel_size = get_arg(ctx, 'kernel_size', pos=1, default=None) @@ -44,25 +44,25 @@ def convert_avg_pool(ctx): output._trt = layer.get_output(0) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)]) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)]) -def test_avg_pool2d_without_ceil_mode(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)], enabled=trt_version() >= '7.0') +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)], enabled=trt_version() >= '7.0') +def test_avg_pool2d_without_ceil_mode_trt7(): return torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)]) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)]) -def test_avg_pool2d_with_ceil_mode(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)], enabled=trt_version() >= '7.0') +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)], enabled=trt_version() >= '7.0') +def test_avg_pool2d_with_ceil_mode_trt7(): return torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True, count_include_pad=False) # TRT does not support ceil_mode=True && count_include_pad=True -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 4, 6)]) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 5, 7)]) -def test_avg_pool3d_without_ceil_mode(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 4, 6)], enabled=trt_version() >= '7.0') +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 5, 7)], enabled=trt_version() >= '7.0') +def test_avg_pool3d_without_ceil_mode_trt7(): return torch.nn.AvgPool3d(kernel_size=3, stride=2, padding=1, ceil_mode=False) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 4, 6)]) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 5, 7)]) -def test_avg_pool3d_with_ceil_mode(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 4, 6)], enabled=trt_version() >= '7.0') +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 5, 7)], enabled=trt_version() >= '7.0') +def test_avg_pool3d_with_ceil_mode_trt7(): return torch.nn.AvgPool3d(kernel_size=3, stride=2, padding=1, ceil_mode=True, count_include_pad=False) # TRT does not support ceil_mode=True && count_include_pad=True diff --git a/torch2trt/converters/trt7_ops/batch_norm.py b/torch2trt/converters/trt7_ops/batch_norm.py index 5af8ac6c..50ba62f6 100644 --- a/torch2trt/converters/trt7_ops/batch_norm.py +++ b/torch2trt/converters/trt7_ops/batch_norm.py @@ -1,8 +1,8 @@ from torch2trt.torch2trt import * from torch2trt.module_test import add_module_test -@tensorrt_converter('torch.nn.functional.batch_norm') -def convert_batch_norm(ctx): +@tensorrt_converter('torch.nn.functional.batch_norm', enabled=trt_version() >= '7.0') +def convert_batch_norm_trt7(ctx): input = get_arg(ctx, 'input', pos=0, default=None) running_mean = get_arg(ctx, 'running_mean', pos=1, default=None) @@ -24,17 +24,17 @@ def convert_batch_norm(ctx): -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)]) -def test_batch_norm_2d(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)], enabled=trt_version() >= '7.0') +def test_batch_norm_2d_trt7(): return torch.nn.BatchNorm2d(10) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3, 3)]) -def test_batch_norm_3d_2(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3, 3)], enabled=trt_version() >= '7.0') +def test_batch_norm_3d_2_trt7(): return torch.nn.BatchNorm3d(10) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 32, 2, 36, 47)]) -def test_batch_norm_3d(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 32, 2, 36, 47)], enabled=trt_version() >= '7.0') +def test_batch_norm_3d_trt7(): return torch.nn.BatchNorm3d(32) - \ No newline at end of file + diff --git a/torch2trt/converters/trt7_ops/compare.py b/torch2trt/converters/trt7_ops/compare.py index f57b9a42..aa152ec2 100644 --- a/torch2trt/converters/trt7_ops/compare.py +++ b/torch2trt/converters/trt7_ops/compare.py @@ -9,18 +9,18 @@ def convert_elementwise(ctx, op): layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, op) output._trt = layer.get_output(0) -@tensorrt_converter('torch.gt') -@tensorrt_converter('torch.Tensor.__gt__') +@tensorrt_converter('torch.gt', enabled=trt_version() >= '7.0') +@tensorrt_converter('torch.Tensor.__gt__', enabled=trt_version() >= '7.0') def convert_gt(ctx): return convert_elementwise(ctx, trt.ElementWiseOperation.GREATER) -@tensorrt_converter('torch.lt') -@tensorrt_converter('torch.Tensor.__lt__') +@tensorrt_converter('torch.lt', enabled=trt_version() >= '7.0') +@tensorrt_converter('torch.Tensor.__lt__', enabled=trt_version() >= '7.0') def convert_gt(ctx): return convert_elementwise(ctx, trt.ElementWiseOperation.LESS) -@tensorrt_converter('torch.eq') -@tensorrt_converter('torch.Tensor.__eq__') +@tensorrt_converter('torch.eq', enabled=trt_version() >= '7.0') +@tensorrt_converter('torch.Tensor.__eq__', enabled=trt_version() >= '7.0') def convert_gt(ctx): return convert_elementwise(ctx, trt.ElementWiseOperation.EQUAL) @@ -46,14 +46,14 @@ def forward(self, x, y): return x == y -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 6, 6), (1, 3, 6, 6)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 6, 6), (1, 3, 6, 6)], enabled=trt_version() >= '7.0') def test_gt_basic(): return GT() -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 6, 6), (1, 3, 6, 6)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 6, 6), (1, 3, 6, 6)], enabled=trt_version() >= '7.0') def test_gt_basic(): return LT() -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 6, 6), (1, 3, 6, 6)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 6, 6), (1, 3, 6, 6)], enabled=trt_version() >= '7.0') def test_gt_basic(): return EQ() diff --git a/torch2trt/converters/trt7_ops/stack.py b/torch2trt/converters/trt7_ops/stack.py index 0b5b4def..b86e909a 100644 --- a/torch2trt/converters/trt7_ops/stack.py +++ b/torch2trt/converters/trt7_ops/stack.py @@ -11,8 +11,8 @@ def unsqueeze(ctx, input, dim): return layer.get_output(0) -@tensorrt_converter('torch.stack') -def convert_cat(ctx): +@tensorrt_converter('torch.stack', enabled=trt_version() >= '7.0') +def convert_cat_trt7(ctx): inputs = get_arg(ctx, 'input', pos=0, default=None) dim = get_arg(ctx, 'dim', pos=1, default=0) @@ -31,10 +31,10 @@ def __init__(self, dim): def forward(self, *x): return torch.stack(x, dim=self.dim) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 4, 4), (1, 4, 4), (1, 4, 4)]) -def test_Stack_basic(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 4, 4), (1, 4, 4), (1, 4, 4)], enabled=trt_version() >= '7.0') +def test_Stack_basic_trt7(): return Stack(3) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 4, 4), (1, 4, 4), (1, 4, 4)]) -def test_Stack_basic2(): - return Stack(1) \ No newline at end of file +@add_module_test(torch.float32, torch.device('cuda'), [(1, 4, 4), (1, 4, 4), (1, 4, 4)], enabled=trt_version() >= '7.0') +def test_Stack_basic2_trt7(): + return Stack(1) diff --git a/torch2trt/converters/trt7_ops/transpose.py b/torch2trt/converters/trt7_ops/transpose.py index 2ea5f7bb..f662ae16 100644 --- a/torch2trt/converters/trt7_ops/transpose.py +++ b/torch2trt/converters/trt7_ops/transpose.py @@ -2,8 +2,8 @@ from torch2trt.module_test import add_module_test -@tensorrt_converter('torch.transpose') -def convert_transpose(ctx): +@tensorrt_converter('torch.transpose', enabled=trt_version() >= '7.0') +def convert_transpose_trt7(ctx): input = ctx.method_args[0] input_trt = trt_(ctx.network, input) output = ctx.method_return @@ -27,7 +27,7 @@ def forward(self, x): return torch.transpose(x, self.dim0, self.dim1).contiguous() -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) -def test_transpose_12(): +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)], enabled=trt_version() >= '7.0') +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)], enabled=trt_version() >= '7.0') +def test_transpose_12_trt7(): return Transpose(1, 2) diff --git a/torch2trt/converters/upsample.py b/torch2trt/converters/upsample.py index 362a1d84..fa3c5fbb 100644 --- a/torch2trt/converters/upsample.py +++ b/torch2trt/converters/upsample.py @@ -2,8 +2,8 @@ from torch2trt.module_test import add_module_test import collections -@tensorrt_converter('torch.nn.functional.interpolate') -@tensorrt_converter('torch.nn.functional.upsample') +@tensorrt_converter('torch.nn.functional.interpolate', enabled=trt_version() >= '7.1') +@tensorrt_converter('torch.nn.functional.upsample', enabled=trt_version() >= '7.1') def convert_interpolate(ctx): #parse args input = get_arg(ctx, 'input', pos=0, default=None) @@ -44,55 +44,55 @@ def convert_interpolate(ctx): output._trt = layer.get_output(0) -@add_module_test(torch.float32, torch.device('cuda'), [(1,2,12,12)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,2,12,12)], enabled=trt_version() >= '7.1') def test_nearest_mode(): return torch.nn.Upsample(scale_factor=2, mode="nearest") -@add_module_test(torch.float32, torch.device('cuda'), [(1,4,12,12)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,4,12,12)], enabled=trt_version() >= '7.1') def test_bilinear_mode(): return torch.nn.Upsample(scale_factor=3, mode="bilinear",align_corners=False) -@add_module_test(torch.float32, torch.device('cuda'), [(1,3,12,12)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,12,12)], enabled=trt_version() >= '7.1') def test_align_corner(): return torch.nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True) -@add_module_test(torch.float32, torch.device('cuda'), [(1,5,13,13)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,5,13,13)], enabled=trt_version() >= '7.1') def test_bilinear_mode_odd_input_shape(): return torch.nn.Upsample(scale_factor=2,mode="bilinear",align_corners=False) -@add_module_test(torch.float32, torch.device('cuda'), [(1,4,12,12)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,4,12,12)], enabled=trt_version() >= '7.1') def test_size_parameter(): return torch.nn.Upsample(size=3,mode="nearest") -@add_module_test(torch.float32, torch.device('cuda'), [(1,3,13,13)]) -@add_module_test(torch.float32, torch.device('cuda'), [(1,3,1,1)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,13,13)], enabled=trt_version() >= '7.1') +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,1,1)], enabled=trt_version() >= '7.1') def test_size_parameter_odd_input(): return torch.nn.Upsample(size=[6,3],mode="nearest") -@add_module_test(torch.float32, torch.device('cuda'), [(1,4,6,6,6)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,4,6,6,6)], enabled=trt_version() >= '7.1') def test_nearest_mode_3d(): return torch.nn.Upsample(scale_factor=2, mode="nearest") -@add_module_test(torch.float32, torch.device('cuda'), [(1,3,5,5,5)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,5,5,5)], enabled=trt_version() >= '7.1') def test_bilinear_mode_3d(): return torch.nn.Upsample(scale_factor=3, mode="trilinear",align_corners=False) -@add_module_test(torch.float32, torch.device('cuda'), [(1,4,8,8,8)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,4,8,8,8)], enabled=trt_version() >= '7.1') def test_align_corner_3d(): return torch.nn.Upsample(scale_factor=4, mode="trilinear", align_corners=True) -@add_module_test(torch.float32, torch.device('cuda'), [(1,6,7,7,7)]) -@add_module_test(torch.float32, torch.device('cuda'), [(1,3,2,4,4)]) -@add_module_test(torch.float32, torch.device('cuda'), [(1,3,1,1,1)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,6,7,7,7)], enabled=trt_version() >= '7.1') +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,2,4,4)], enabled=trt_version() >= '7.1') +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,1,1,1)], enabled=trt_version() >= '7.1') def test_bilinear_mode_odd_input_shape_3d(): return torch.nn.Upsample(scale_factor=2, mode="trilinear",align_corners=False) -@add_module_test(torch.float32, torch.device('cuda'), [(1,1,12,12,12)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,12,12,12)], enabled=trt_version() >= '7.1') def test_size_parameter_3d(): return torch.nn.Upsample(size=3,mode="trilinear", align_corners=True) -@add_module_test(torch.float32, torch.device('cuda'), [(1,3,7,9,5)]) -@add_module_test(torch.float32, torch.device('cuda'), [(1,4,3,5,1)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,7,9,5)], enabled=trt_version() >= '7.1') +@add_module_test(torch.float32, torch.device('cuda'), [(1,4,3,5,1)], enabled=trt_version() >= '7.1') def test_size_parameter_odd_input_3d(): - return torch.nn.Upsample(size=[11,14,17],mode="trilinear", align_corners=False) \ No newline at end of file + return torch.nn.Upsample(size=[11,14,17],mode="trilinear", align_corners=False) diff --git a/torch2trt/module_test.py b/torch2trt/module_test.py index b590bd95..fb158fe8 100644 --- a/torch2trt/module_test.py +++ b/torch2trt/module_test.py @@ -18,9 +18,18 @@ def module_name(self): ] -def add_module_test(dtype, device, input_shapes, **torch2trt_kwargs): +def add_module_test(dtype, device, input_shapes, enabled=True, **torch2trt_kwargs): def register_module_test(module): global MODULE_TESTS MODULE_TESTS += [ModuleTest(module, dtype, device, input_shapes, **torch2trt_kwargs)] return module - return register_module_test \ No newline at end of file + + def pass_module_test(module): + return module + + if enabled: + return register_module_test + else: + return pass_module_test + + return register_module_test diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 479c1471..8ba79cb5 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -2,21 +2,24 @@ import tensorrt as trt from copy import copy import numpy as np -from torch2trt.utils import get_trt_version + from .calibration import ( TensorBatchDataset, DatasetCalibrator, DEFAULT_CALIBRATION_ALGORITHM, ) -trt_version = get_trt_version() # UTILITY FUNCTIONS +def trt_version(): + return trt.__version__ + + def torch_dtype_to_trt(dtype): - if trt_version >= 7.0 and dtype == torch.bool: + if trt_version() >= '7.0' and dtype == torch.bool: return trt.bool - if dtype == torch.int8: + elif dtype == torch.int8: return trt.int8 elif dtype == torch.int32: return trt.int32 @@ -31,7 +34,7 @@ def torch_dtype_to_trt(dtype): def torch_dtype_from_trt(dtype): if dtype == trt.int8: return torch.int8 - if trt_version >= 7.0 and dtype == trt.bool: + elif trt_version() >= '7.0' and dtype == trt.bool: return torch.bool elif dtype == trt.int32: return torch.int32 @@ -438,9 +441,18 @@ def torch2trt(module, # DEFINE ALL CONVERSION FUNCTIONS -def tensorrt_converter(method, is_real=True): +def tensorrt_converter(method, is_real=True, enabled=True): + def register_converter(converter): CONVERTERS[method] = {"converter": converter, "is_real": is_real} return converter + def pass_converter(converter): + return converter + + if enabled: + return register_converter + else: + return pass_converter + return register_converter diff --git a/torch2trt/utils.py b/torch2trt/utils.py index 95fcd745..f5493e00 100644 --- a/torch2trt/utils.py +++ b/torch2trt/utils.py @@ -61,7 +61,3 @@ def trt_network_to_dot_graph(network): dot.edge(layer_b.name, input_i.name, label=str(input_j.shape)) return dot - - -def get_trt_version(): - return float(trt.__version__[:3]) From 5c3189253a591dfdcc44f99867ee8506a3c3d7d8 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 9 Jun 2020 14:59:19 -0700 Subject: [PATCH 280/355] moved trt7 ops to converters dir --- torch2trt/converters/{trt7_ops => }/Conv.py | 0 .../{trt7_ops => }/ConvTranspose.py | 0 torch2trt/converters/__init__.py | 14 ++--- .../converters/{trt7_ops => }/avg_pool.py | 61 ++++++++++++++++--- torch2trt/converters/avg_pool2d.py | 57 ----------------- .../converters/{trt7_ops => }/batch_norm.py | 0 .../converters/{trt7_ops => }/compare.py | 0 torch2trt/converters/{trt7_ops => }/stack.py | 0 torch2trt/converters/transpose.py | 21 ++++++- torch2trt/converters/trt7_ops/__init__.py | 0 torch2trt/converters/trt7_ops/transpose.py | 33 ---------- 11 files changed, 77 insertions(+), 109 deletions(-) rename torch2trt/converters/{trt7_ops => }/Conv.py (100%) rename torch2trt/converters/{trt7_ops => }/ConvTranspose.py (100%) rename torch2trt/converters/{trt7_ops => }/avg_pool.py (56%) delete mode 100644 torch2trt/converters/avg_pool2d.py rename torch2trt/converters/{trt7_ops => }/batch_norm.py (100%) rename torch2trt/converters/{trt7_ops => }/compare.py (100%) rename torch2trt/converters/{trt7_ops => }/stack.py (100%) delete mode 100644 torch2trt/converters/trt7_ops/__init__.py delete mode 100644 torch2trt/converters/trt7_ops/transpose.py diff --git a/torch2trt/converters/trt7_ops/Conv.py b/torch2trt/converters/Conv.py similarity index 100% rename from torch2trt/converters/trt7_ops/Conv.py rename to torch2trt/converters/Conv.py diff --git a/torch2trt/converters/trt7_ops/ConvTranspose.py b/torch2trt/converters/ConvTranspose.py similarity index 100% rename from torch2trt/converters/trt7_ops/ConvTranspose.py rename to torch2trt/converters/ConvTranspose.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 1ca1a416..541bd08c 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -46,20 +46,18 @@ from .unary import * # trt < 7.0 -from .avg_pool2d import * +from .avg_pool import * from .BatchNorm2d import * from .Conv2d import * from .ConvTranspose2d import * from .transpose import * # trt >= 7.0 -from .trt7_ops.avg_pool import * -from .trt7_ops.compare import * -from .trt7_ops.batch_norm import * -from .trt7_ops.Conv import * -from .trt7_ops.ConvTranspose import * -from .trt7_ops.stack import * -from .trt7_ops.transpose import * +from .compare import * +from .batch_norm import * +from .Conv import * +from .ConvTranspose import * +from .stack import * # trt >= 7.1 from .upsample import * diff --git a/torch2trt/converters/trt7_ops/avg_pool.py b/torch2trt/converters/avg_pool.py similarity index 56% rename from torch2trt/converters/trt7_ops/avg_pool.py rename to torch2trt/converters/avg_pool.py index 6219aaad..ec22f76d 100644 --- a/torch2trt/converters/trt7_ops/avg_pool.py +++ b/torch2trt/converters/avg_pool.py @@ -2,6 +2,47 @@ from torch2trt.module_test import add_module_test +@tensorrt_converter("torch.nn.functional.avg_pool2d", enabled=trt_version() < '7.0') +def convert_avg_pool2d(ctx): + # parse args + input = get_arg(ctx, "input", pos=0, default=None) + kernel_size = get_arg(ctx, "kernel_size", pos=1, default=None) + stride = get_arg(ctx, "stride", pos=2, default=None) + padding = get_arg(ctx, "padding", pos=3, default=0) + ceil_mode = get_arg(ctx, "ceil_mode", pos=4, default=False) + count_include_pad = get_arg(ctx, "count_include_pad", pos=5, default=True) + + # get input trt tensor (or create constant if it doesn't exist) + input_trt = trt_(ctx.network, input) + + output = ctx.method_return + + # get kernel size + if not isinstance(kernel_size, tuple): + kernel_size = (kernel_size,) * 2 + + # get stride + if not isinstance(stride, tuple): + stride = (stride,) * 2 + + # get padding + if not isinstance(padding, tuple): + padding = (padding,) * 2 + + layer = ctx.network.add_pooling( + input=input_trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size + ) + + layer.stride = stride + layer.padding = padding + layer.average_count_excludes_padding = not count_include_pad + + if ceil_mode: + layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP + + output._trt = layer.get_output(0) + + @tensorrt_converter('torch.nn.functional.avg_pool2d', enabled=trt_version() >= '7.0') @tensorrt_converter('torch.nn.functional.avg_pool3d', enabled=trt_version() >= '7.0') def convert_avg_pool_trt7(ctx): @@ -42,18 +83,20 @@ def convert_avg_pool_trt7(ctx): layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP output._trt = layer.get_output(0) - - -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)], enabled=trt_version() >= '7.0') -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)], enabled=trt_version() >= '7.0') -def test_avg_pool2d_without_ceil_mode_trt7(): + + +@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 4, 6)]) +@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 5, 7)]) +def test_avg_pool2d_without_ceil_mode(): return torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)], enabled=trt_version() >= '7.0') -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)], enabled=trt_version() >= '7.0') -def test_avg_pool2d_with_ceil_mode_trt7(): - return torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True, count_include_pad=False) # TRT does not support ceil_mode=True && count_include_pad=True +@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 4, 6)]) +@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 5, 7)]) +def test_avg_pool2d_with_ceil_mode(): + return torch.nn.AvgPool2d( + kernel_size=3, stride=2, padding=1, ceil_mode=True, count_include_pad=False + ) # TRT does not support ceil_mode=True && count_include_pad=True @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 4, 6)], enabled=trt_version() >= '7.0') diff --git a/torch2trt/converters/avg_pool2d.py b/torch2trt/converters/avg_pool2d.py deleted file mode 100644 index 2b359412..00000000 --- a/torch2trt/converters/avg_pool2d.py +++ /dev/null @@ -1,57 +0,0 @@ -from torch2trt.torch2trt import * -from torch2trt.module_test import add_module_test - - -@tensorrt_converter("torch.nn.functional.avg_pool2d", enabled=trt_version() < '7.0') -def convert_avg_pool2d(ctx): - # parse args - input = get_arg(ctx, "input", pos=0, default=None) - kernel_size = get_arg(ctx, "kernel_size", pos=1, default=None) - stride = get_arg(ctx, "stride", pos=2, default=None) - padding = get_arg(ctx, "padding", pos=3, default=0) - ceil_mode = get_arg(ctx, "ceil_mode", pos=4, default=False) - count_include_pad = get_arg(ctx, "count_include_pad", pos=5, default=True) - - # get input trt tensor (or create constant if it doesn't exist) - input_trt = trt_(ctx.network, input) - - output = ctx.method_return - - # get kernel size - if not isinstance(kernel_size, tuple): - kernel_size = (kernel_size,) * 2 - - # get stride - if not isinstance(stride, tuple): - stride = (stride,) * 2 - - # get padding - if not isinstance(padding, tuple): - padding = (padding,) * 2 - - layer = ctx.network.add_pooling( - input=input_trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size - ) - - layer.stride = stride - layer.padding = padding - layer.average_count_excludes_padding = not count_include_pad - - if ceil_mode: - layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP - - output._trt = layer.get_output(0) - - -@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 4, 6)], enabled=trt_version() < '7.0') -@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 5, 7)], enabled=trt_version() < '7.0') -def test_avg_pool2d_without_ceil_mode(): - return torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False) - - -@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 4, 6)], enabled=trt_version() < '7.0') -@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 5, 7)], enabled=trt_version() < '7.0') -def test_avg_pool2d_with_ceil_mode(): - return torch.nn.AvgPool2d( - kernel_size=3, stride=2, padding=1, ceil_mode=True, count_include_pad=False - ) # TRT does not support ceil_mode=True && count_include_pad=True diff --git a/torch2trt/converters/trt7_ops/batch_norm.py b/torch2trt/converters/batch_norm.py similarity index 100% rename from torch2trt/converters/trt7_ops/batch_norm.py rename to torch2trt/converters/batch_norm.py diff --git a/torch2trt/converters/trt7_ops/compare.py b/torch2trt/converters/compare.py similarity index 100% rename from torch2trt/converters/trt7_ops/compare.py rename to torch2trt/converters/compare.py diff --git a/torch2trt/converters/trt7_ops/stack.py b/torch2trt/converters/stack.py similarity index 100% rename from torch2trt/converters/trt7_ops/stack.py rename to torch2trt/converters/stack.py diff --git a/torch2trt/converters/transpose.py b/torch2trt/converters/transpose.py index 4df56686..4f0f28f4 100644 --- a/torch2trt/converters/transpose.py +++ b/torch2trt/converters/transpose.py @@ -18,6 +18,23 @@ def convert_transpose(ctx): output._trt = layer.get_output(0) +@tensorrt_converter('torch.transpose', enabled=trt_version() >= '7.0') +def convert_transpose_trt7(ctx): + input = ctx.method_args[0] + input_trt = trt_(ctx.network, input) + output = ctx.method_return + # permutation -1 because TRT does not include batch dim + permutation = list(range(len(input.shape) - 1)) + dim0 = ctx.method_args[1] - 1 + dim1 = ctx.method_args[2] - 1 + permutation[dim0] = dim1 + permutation[dim1] = dim0 + layer = ctx.network.add_shuffle(input_trt) + layer.second_transpose = tuple(permutation) + output._trt = layer.get_output(0) + + + class Transpose(torch.nn.Module): def __init__(self, dim0, dim1): super(Transpose, self).__init__() @@ -28,7 +45,7 @@ def forward(self, x): return torch.transpose(x, self.dim0, self.dim1).contiguous() -@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 3)], enabled=trt_version() < '7.0') -@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 3, 3)], enabled=trt_version() < '7.0') +@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 3)]) +@add_module_test(torch.float32, torch.device("cuda"), [(1, 3, 3, 3)]) def test_transpose_12(): return Transpose(1, 2) diff --git a/torch2trt/converters/trt7_ops/__init__.py b/torch2trt/converters/trt7_ops/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/torch2trt/converters/trt7_ops/transpose.py b/torch2trt/converters/trt7_ops/transpose.py deleted file mode 100644 index f662ae16..00000000 --- a/torch2trt/converters/trt7_ops/transpose.py +++ /dev/null @@ -1,33 +0,0 @@ -from torch2trt.torch2trt import * -from torch2trt.module_test import add_module_test - - -@tensorrt_converter('torch.transpose', enabled=trt_version() >= '7.0') -def convert_transpose_trt7(ctx): - input = ctx.method_args[0] - input_trt = trt_(ctx.network, input) - output = ctx.method_return - # permutation -1 because TRT does not include batch dim - permutation = list(range(len(input.shape) - 1)) - dim0 = ctx.method_args[1] - 1 - dim1 = ctx.method_args[2] - 1 - permutation[dim0] = dim1 - permutation[dim1] = dim0 - layer = ctx.network.add_shuffle(input_trt) - layer.second_transpose = tuple(permutation) - output._trt = layer.get_output(0) - - -class Transpose(torch.nn.Module): - def __init__(self, dim0, dim1): - super(Transpose, self).__init__() - self.dim0 = dim0 - self.dim1 = dim1 - def forward(self, x): - return torch.transpose(x, self.dim0, self.dim1).contiguous() - - -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)], enabled=trt_version() >= '7.0') -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)], enabled=trt_version() >= '7.0') -def test_transpose_12_trt7(): - return Transpose(1, 2) From e3f45d33127c08502619ec2ab56121b7d41c5afe Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 9 Jun 2020 15:10:39 -0700 Subject: [PATCH 281/355] merged interpolate modules --- torch2trt/converters/__init__.py | 7 +- torch2trt/converters/interpolate.py | 108 ++++++++++++++++++++++++++-- torch2trt/converters/upsample.py | 98 ------------------------- 3 files changed, 103 insertions(+), 110 deletions(-) delete mode 100644 torch2trt/converters/upsample.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 541bd08c..b3527724 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -60,9 +60,4 @@ from .stack import * # trt >= 7.1 -from .upsample import * - -try: - from .interpolate import * -except: - pass +from .interpolate import * diff --git a/torch2trt/converters/interpolate.py b/torch2trt/converters/interpolate.py index c1dff1ef..bef2f37b 100644 --- a/torch2trt/converters/interpolate.py +++ b/torch2trt/converters/interpolate.py @@ -1,12 +1,12 @@ -import tensorrt as trt import torch.nn.functional as F -from torch2trt.torch2trt import * +import torch.nn as nn +from torch2trt.torch2trt import * from torch2trt.module_test import add_module_test -from torch2trt.plugins import InterpolatePlugin -# from .interpolate_pb2 import interpolate_Message -import torch.nn as nn +import collections + def get_interpolate_plugin(size, mode, align_corners): + from torch2trt.plugins import InterpolatePlugin PLUGIN_NAME = 'interpolate' registry = trt.get_plugin_registry() creator = [c for c in registry.plugin_creator_list if c.name == PLUGIN_NAME and c.plugin_namespace == 'torch2trt'][0] @@ -15,7 +15,7 @@ def get_interpolate_plugin(size, mode, align_corners): @tensorrt_converter('torch.nn.functional.interpolate', enabled=trt_version() < '7.1') -def convert_interpolate(ctx): +def convert_interpolate_plugin(ctx): input = ctx.method_args[0] input_trt = trt_(ctx.network, input) output = ctx.method_return @@ -40,6 +40,49 @@ def convert_interpolate(ctx): output._trt = layer.get_output(0) + +@tensorrt_converter('torch.nn.functional.interpolate', enabled=trt_version() >= '7.1') +@tensorrt_converter('torch.nn.functional.upsample', enabled=trt_version() >= '7.1') +def convert_interpolate_trt7(ctx): + #parse args + input = get_arg(ctx, 'input', pos=0, default=None) + size = get_arg(ctx, 'size', pos=1, default=None) + scale_factor=get_arg(ctx, 'scale_factor', pos=2, default=None) + mode = get_arg(ctx, 'mode', pos=3, default='nearest') + align_corners = get_arg(ctx, 'align_corners', pos=4, default=None) + + input_dim = input.dim() - 2 + + input_trt = trt_(ctx.network, input) + output = ctx.method_return + layer = ctx.network.add_resize(input=input_trt) + + shape = size + if shape != None: + if isinstance(shape, collections.Sequence): + shape = [input.size(1)] + list(shape) + else: + shape = [input.size(1)] + [shape] * input_dim + + layer.shape = shape + + scales = scale_factor + if scales != None: + if not isinstance(scales, collections.Sequence): + scales = [scales] * input_dim + layer.scales = [1] + list(scales) + + resize_mode = mode + if resize_mode.lower() in ["linear","bilinear","trilinear"]: + layer.resize_mode = trt.ResizeMode.LINEAR + else: + layer.resize_mode=trt.ResizeMode.NEAREST + + if align_corners != None: + layer.align_corners = align_corners + + output._trt = layer.get_output(0) + class Interpolate(torch.nn.Module): def __init__(self, size, mode, align_corners): @@ -74,3 +117,56 @@ def test_interpolate_area(): @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1') def test_upsample_scale_factor2(): return nn.Upsample(scale_factor=2, mode='bilinear',align_corners=False) + +@add_module_test(torch.float32, torch.device('cuda'), [(1,2,12,12)], enabled=trt_version() >= '7.1') +def test_nearest_mode(): + return torch.nn.Upsample(scale_factor=2, mode="nearest") + +@add_module_test(torch.float32, torch.device('cuda'), [(1,4,12,12)], enabled=trt_version() >= '7.1') +def test_bilinear_mode(): + return torch.nn.Upsample(scale_factor=3, mode="bilinear",align_corners=False) + +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,12,12)], enabled=trt_version() >= '7.1') +def test_align_corner(): + return torch.nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True) + +@add_module_test(torch.float32, torch.device('cuda'), [(1,5,13,13)], enabled=trt_version() >= '7.1') +def test_bilinear_mode_odd_input_shape(): + return torch.nn.Upsample(scale_factor=2,mode="bilinear",align_corners=False) + +@add_module_test(torch.float32, torch.device('cuda'), [(1,4,12,12)], enabled=trt_version() >= '7.1') +def test_size_parameter(): + return torch.nn.Upsample(size=3,mode="nearest") + +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,13,13)], enabled=trt_version() >= '7.1') +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,1,1)], enabled=trt_version() >= '7.1') +def test_size_parameter_odd_input(): + return torch.nn.Upsample(size=[6,3],mode="nearest") + + +@add_module_test(torch.float32, torch.device('cuda'), [(1,4,6,6,6)], enabled=trt_version() >= '7.1') +def test_nearest_mode_3d(): + return torch.nn.Upsample(scale_factor=2, mode="nearest") + +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,5,5,5)], enabled=trt_version() >= '7.1') +def test_bilinear_mode_3d(): + return torch.nn.Upsample(scale_factor=3, mode="trilinear",align_corners=False) + +@add_module_test(torch.float32, torch.device('cuda'), [(1,4,8,8,8)], enabled=trt_version() >= '7.1') +def test_align_corner_3d(): + return torch.nn.Upsample(scale_factor=4, mode="trilinear", align_corners=True) + +@add_module_test(torch.float32, torch.device('cuda'), [(1,6,7,7,7)], enabled=trt_version() >= '7.1') +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,2,4,4)], enabled=trt_version() >= '7.1') +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,1,1,1)], enabled=trt_version() >= '7.1') +def test_bilinear_mode_odd_input_shape_3d(): + return torch.nn.Upsample(scale_factor=2, mode="trilinear",align_corners=False) + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,12,12,12)], enabled=trt_version() >= '7.1') +def test_size_parameter_3d(): + return torch.nn.Upsample(size=3,mode="trilinear", align_corners=True) + +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,7,9,5)], enabled=trt_version() >= '7.1') +@add_module_test(torch.float32, torch.device('cuda'), [(1,4,3,5,1)], enabled=trt_version() >= '7.1') +def test_size_parameter_odd_input_3d(): + return torch.nn.Upsample(size=[11,14,17],mode="trilinear", align_corners=False) diff --git a/torch2trt/converters/upsample.py b/torch2trt/converters/upsample.py deleted file mode 100644 index fa3c5fbb..00000000 --- a/torch2trt/converters/upsample.py +++ /dev/null @@ -1,98 +0,0 @@ -from torch2trt.torch2trt import * -from torch2trt.module_test import add_module_test -import collections - -@tensorrt_converter('torch.nn.functional.interpolate', enabled=trt_version() >= '7.1') -@tensorrt_converter('torch.nn.functional.upsample', enabled=trt_version() >= '7.1') -def convert_interpolate(ctx): - #parse args - input = get_arg(ctx, 'input', pos=0, default=None) - size = get_arg(ctx, 'size', pos=1, default=None) - scale_factor=get_arg(ctx, 'scale_factor', pos=2, default=None) - mode = get_arg(ctx, 'mode', pos=3, default='nearest') - align_corners = get_arg(ctx, 'align_corners', pos=4, default=None) - - input_dim = input.dim() - 2 - - input_trt = trt_(ctx.network, input) - output = ctx.method_return - layer = ctx.network.add_resize(input=input_trt) - - shape = size - if shape != None: - if isinstance(shape, collections.Sequence): - shape = [input.size(1)] + list(shape) - else: - shape = [input.size(1)] + [shape] * input_dim - - layer.shape = shape - - scales = scale_factor - if scales != None: - if not isinstance(scales, collections.Sequence): - scales = [scales] * input_dim - layer.scales = [1] + list(scales) - - resize_mode = mode - if resize_mode.lower() in ["linear","bilinear","trilinear"]: - layer.resize_mode = trt.ResizeMode.LINEAR - else: - layer.resize_mode=trt.ResizeMode.NEAREST - - if align_corners != None: - layer.align_corners = align_corners - - output._trt = layer.get_output(0) - -@add_module_test(torch.float32, torch.device('cuda'), [(1,2,12,12)], enabled=trt_version() >= '7.1') -def test_nearest_mode(): - return torch.nn.Upsample(scale_factor=2, mode="nearest") - -@add_module_test(torch.float32, torch.device('cuda'), [(1,4,12,12)], enabled=trt_version() >= '7.1') -def test_bilinear_mode(): - return torch.nn.Upsample(scale_factor=3, mode="bilinear",align_corners=False) - -@add_module_test(torch.float32, torch.device('cuda'), [(1,3,12,12)], enabled=trt_version() >= '7.1') -def test_align_corner(): - return torch.nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True) - -@add_module_test(torch.float32, torch.device('cuda'), [(1,5,13,13)], enabled=trt_version() >= '7.1') -def test_bilinear_mode_odd_input_shape(): - return torch.nn.Upsample(scale_factor=2,mode="bilinear",align_corners=False) - -@add_module_test(torch.float32, torch.device('cuda'), [(1,4,12,12)], enabled=trt_version() >= '7.1') -def test_size_parameter(): - return torch.nn.Upsample(size=3,mode="nearest") - -@add_module_test(torch.float32, torch.device('cuda'), [(1,3,13,13)], enabled=trt_version() >= '7.1') -@add_module_test(torch.float32, torch.device('cuda'), [(1,3,1,1)], enabled=trt_version() >= '7.1') -def test_size_parameter_odd_input(): - return torch.nn.Upsample(size=[6,3],mode="nearest") - - -@add_module_test(torch.float32, torch.device('cuda'), [(1,4,6,6,6)], enabled=trt_version() >= '7.1') -def test_nearest_mode_3d(): - return torch.nn.Upsample(scale_factor=2, mode="nearest") - -@add_module_test(torch.float32, torch.device('cuda'), [(1,3,5,5,5)], enabled=trt_version() >= '7.1') -def test_bilinear_mode_3d(): - return torch.nn.Upsample(scale_factor=3, mode="trilinear",align_corners=False) - -@add_module_test(torch.float32, torch.device('cuda'), [(1,4,8,8,8)], enabled=trt_version() >= '7.1') -def test_align_corner_3d(): - return torch.nn.Upsample(scale_factor=4, mode="trilinear", align_corners=True) - -@add_module_test(torch.float32, torch.device('cuda'), [(1,6,7,7,7)], enabled=trt_version() >= '7.1') -@add_module_test(torch.float32, torch.device('cuda'), [(1,3,2,4,4)], enabled=trt_version() >= '7.1') -@add_module_test(torch.float32, torch.device('cuda'), [(1,3,1,1,1)], enabled=trt_version() >= '7.1') -def test_bilinear_mode_odd_input_shape_3d(): - return torch.nn.Upsample(scale_factor=2, mode="trilinear",align_corners=False) - -@add_module_test(torch.float32, torch.device('cuda'), [(1,1,12,12,12)], enabled=trt_version() >= '7.1') -def test_size_parameter_3d(): - return torch.nn.Upsample(size=3,mode="trilinear", align_corners=True) - -@add_module_test(torch.float32, torch.device('cuda'), [(1,3,7,9,5)], enabled=trt_version() >= '7.1') -@add_module_test(torch.float32, torch.device('cuda'), [(1,4,3,5,1)], enabled=trt_version() >= '7.1') -def test_size_parameter_odd_input_3d(): - return torch.nn.Upsample(size=[11,14,17],mode="trilinear", align_corners=False) From 1f4f69c113f18de00ab95154e5ec1737bde8ae6b Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 9 Jun 2020 15:12:29 -0700 Subject: [PATCH 282/355] format init --- torch2trt/converters/__init__.py | 60 ++++++++++++++------------------ 1 file changed, 27 insertions(+), 33 deletions(-) diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index b3527724..2bb0c828 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -4,26 +4,39 @@ # supported converters will override dummy converters +from .AdaptiveAvgPool2d import * +from .BatchNorm1d import * +from .BatchNorm2d import * +from .Conv import * +from .Conv1d import * +from .Conv2d import * +from .ConvTranspose import * +from .ConvTranspose2d import * +from .Identity import * +from .Linear import * +from .LogSoftmax import * +from .ReLU import * +from .ReLU6 import * from .activation import * from .adaptive_avg_pool2d import * from .adaptive_max_pool2d import * -from .AdaptiveAvgPool2d import * from .add import * -from .mul import * -from .div import * -from .BatchNorm1d import * +from .avg_pool import * +from .batch_norm import * from .cat import * +from .chunk import * from .clamp import * -from .Conv1d import * +from .compare import * +from .div import * from .getitem import * from .identity import * -from .Identity import * from .instance_norm import * -from .Linear import * -from .LogSoftmax import * -from .max_pool2d import * +from .interpolate import * from .max import * +from .max_pool2d import * +from .mean import * from .min import * +from .mul import * from .normalize import * from .pad import * from .permute import * @@ -31,33 +44,14 @@ from .prelu import * from .prod import * from .relu import * -from .ReLU import * from .relu6 import * -from .ReLU6 import * from .sigmoid import * +from .softmax import * +from .split import * +from .stack import * from .sub import * from .sum import * -from .view import * from .tanh import * -from .mean import * -from .softmax import * -from .split import * -from .chunk import * -from .unary import * - -# trt < 7.0 -from .avg_pool import * -from .BatchNorm2d import * -from .Conv2d import * -from .ConvTranspose2d import * from .transpose import * - -# trt >= 7.0 -from .compare import * -from .batch_norm import * -from .Conv import * -from .ConvTranspose import * -from .stack import * - -# trt >= 7.1 -from .interpolate import * +from .unary import * +from .view import * From 229f80703e2330090d120d7f0ef053abf1fa73d2 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 9 Jun 2020 15:13:01 -0700 Subject: [PATCH 283/355] increment minor vers --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 05657ae5..a5149a22 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ def trt_lib_dir(): setup( name='torch2trt', - version='0.0.3', + version='0.1.0', description='An easy to use PyTorch to TensorRT converter', packages=find_packages(), ext_package='torch2trt', From 57c81887a6878b9a73b5247d38d52af885571cec Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 9 Jun 2020 18:35:03 -0700 Subject: [PATCH 284/355] added --plugins to setup --- setup.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index a5149a22..75fa4237 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,4 @@ +import sys import torch from setuptools import setup, find_packages from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CppExtension @@ -8,8 +9,9 @@ def trt_inc_dir(): def trt_lib_dir(): return "/usr/lib/aarch64-linux-gnu" -ext_modules = [ - CUDAExtension( +ext_modules = [] + +plugins_ext_module = CUDAExtension( name='plugins', sources=[ 'torch2trt/plugins/interpolate.cpp' @@ -27,9 +29,12 @@ def trt_lib_dir(): 'cxx': ['-DUSE_DEPRECATED_INTLIST'] if torch.__version__ < "1.5" else [], 'nvcc': [] } - ) -] +) +if '--plugins' in sys.argv: + ext_modules.append(plugins_ext_module) + sys.argv.remove('--plugins') + setup( name='torch2trt', From a22e82c9d41eb068e4bcd3bb4fbf1d77e5be8971 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 9 Jun 2020 18:50:51 -0700 Subject: [PATCH 285/355] plugins disclaimer to readme --- README.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/README.md b/README.md index 7a50588d..6e41848f 100644 --- a/README.md +++ b/README.md @@ -113,7 +113,7 @@ sudo python setup.py install To install with plugins to support some operations in PyTorch that are not natviely supported with TensorRT, call the following -> This currently only includes a plugin for ``torch.nn.functional.interpolate`` +> Please note, this currently only includes the interpolate plugin. This plugin requires PyTorch 1.3+ for serialization. ```bash sudo apt-get install libprotobuf* protobuf-compiler ninja-build @@ -122,9 +122,6 @@ cd torch2trt sudo python setup.py install --plugins ``` -> torch2trt is tested against a system configured with the [JetCard](http://github.com/NVIDIA-AI-IOT/jetcard) setup. Different system configurations may require additional steps. - - ## How does it work? This converter works by attaching conversion functions (like ``convert_ReLU``) to the original From 1a5a39703483dc135170b0f8e91e3e935db4b1a3 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 9 Jun 2020 18:57:28 -0700 Subject: [PATCH 286/355] disable interpolate if PyTorch < 1.3 --- torch2trt/converters/interpolate.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/torch2trt/converters/interpolate.py b/torch2trt/converters/interpolate.py index bef2f37b..4b85a332 100644 --- a/torch2trt/converters/interpolate.py +++ b/torch2trt/converters/interpolate.py @@ -14,7 +14,7 @@ def get_interpolate_plugin(size, mode, align_corners): return creator.deserialize_plugin(PLUGIN_NAME, torch2trt_plugin.serializeToString()) -@tensorrt_converter('torch.nn.functional.interpolate', enabled=trt_version() < '7.1') +@tensorrt_converter('torch.nn.functional.interpolate', enabled=trt_version() < '7.1' and torch.__version__ >= '1.3') def convert_interpolate_plugin(ctx): input = ctx.method_args[0] input_trt = trt_(ctx.network, input) @@ -95,26 +95,26 @@ def forward(self, x): return F.interpolate(x, self.size, mode=self.mode, align_corners=self.align_corners) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1') +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and torch.__version__ >= '1.3') def test_interpolate_nearest(): return Interpolate((224, 224), 'nearest', None) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1') +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and torch.__version__ >= '1.3') def test_interpolate_bilinear(): return Interpolate((224, 224), 'bilinear', False) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1') +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and torch.__version__ >= '1.3') def test_interpolate_bicubic(): return Interpolate((224, 224), 'bicubic', False) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1') +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and torch.__version__ >= '1.3') def test_interpolate_area(): return Interpolate((56, 56), 'area', None) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1') +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and torch.__version__ >= '1.3') def test_upsample_scale_factor2(): return nn.Upsample(scale_factor=2, mode='bilinear',align_corners=False) From b31611cd83179b4c44a2922488e0090f3e4f4fc6 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 10 Jun 2020 02:28:31 +0000 Subject: [PATCH 287/355] fixed interpolate plugin checking --- torch2trt/converters/interpolate.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/torch2trt/converters/interpolate.py b/torch2trt/converters/interpolate.py index 4b85a332..96a63d7d 100644 --- a/torch2trt/converters/interpolate.py +++ b/torch2trt/converters/interpolate.py @@ -5,6 +5,13 @@ import collections +def has_interpolate_plugin(): + try: + from torch2trt.plugins import InterpolatePlugin + return True + except: + return False + def get_interpolate_plugin(size, mode, align_corners): from torch2trt.plugins import InterpolatePlugin PLUGIN_NAME = 'interpolate' @@ -14,7 +21,7 @@ def get_interpolate_plugin(size, mode, align_corners): return creator.deserialize_plugin(PLUGIN_NAME, torch2trt_plugin.serializeToString()) -@tensorrt_converter('torch.nn.functional.interpolate', enabled=trt_version() < '7.1' and torch.__version__ >= '1.3') +@tensorrt_converter('torch.nn.functional.interpolate', enabled=trt_version() < '7.1' and has_interpolate_plugin()) def convert_interpolate_plugin(ctx): input = ctx.method_args[0] input_trt = trt_(ctx.network, input) @@ -95,26 +102,26 @@ def forward(self, x): return F.interpolate(x, self.size, mode=self.mode, align_corners=self.align_corners) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and torch.__version__ >= '1.3') +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and has_interpolate_plugin()) def test_interpolate_nearest(): return Interpolate((224, 224), 'nearest', None) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and torch.__version__ >= '1.3') +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and has_interpolate_plugin()) def test_interpolate_bilinear(): return Interpolate((224, 224), 'bilinear', False) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and torch.__version__ >= '1.3') +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and has_interpolate_plugin()) def test_interpolate_bicubic(): return Interpolate((224, 224), 'bicubic', False) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and torch.__version__ >= '1.3') +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and has_interpolate_plugin()) def test_interpolate_area(): return Interpolate((56, 56), 'area', None) -@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and torch.__version__ >= '1.3') +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], enabled=trt_version() < '7.1' and has_interpolate_plugin()) def test_upsample_scale_factor2(): return nn.Upsample(scale_factor=2, mode='bilinear',align_corners=False) From 75fd8e2f765bba3e9f03e7516540950bf5f09f1b Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 11 Jun 2020 18:17:28 +0000 Subject: [PATCH 288/355] allowed plugin loading to silent fail --- torch2trt/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/torch2trt/__init__.py b/torch2trt/__init__.py index ddc1608e..fa80d89b 100644 --- a/torch2trt/__init__.py +++ b/torch2trt/__init__.py @@ -10,5 +10,7 @@ def load_plugins(): for c in torch2trt_creators: registry.register_creator(c, 'torch2trt') - -load_plugins() \ No newline at end of file +try: + load_plugins() +except: + pass From 22e1118426ca16463d57b97000798e9debc853ee Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 11 Jun 2020 19:20:39 +0000 Subject: [PATCH 289/355] added ONNX conversion alternative to torch2trt --- torch2trt/test.py | 37 +++++++++++++++++++------ torch2trt/torch2trt.py | 63 ++++++++++++++++++++++++++++++------------ 2 files changed, 74 insertions(+), 26 deletions(-) diff --git a/torch2trt/test.py b/torch2trt/test.py index 5578b882..de5046ba 100644 --- a/torch2trt/test.py +++ b/torch2trt/test.py @@ -19,6 +19,7 @@ def run(self): for shape in self.input_shapes: inputs_conversion += (torch.zeros(shape).to(self.device).type(self.dtype), ) + # convert module module_trt = torch2trt(module, inputs_conversion, max_workspace_size=1 << 20, **self.torch2trt_kwargs) @@ -98,11 +99,13 @@ def run(self): parser.add_argument('--name', help='Regular expression to filter modules to test by name', type=str, default='.*') parser.add_argument('--tolerance', help='Maximum error to print warning for entry', type=float, default='-1') parser.add_argument('--include', help='Addition python file to include defining additional tests', action='append', default=[]) + parser.add_argument('--use_onnx', help='Whether to test using ONNX or torch2trt tracing', action='store_true') args = parser.parse_args() for include in args.include: runpy.run_module(include) + num_tests, num_success, num_tolerance, num_error = 0, 0, 0, 0 for test in MODULE_TESTS: # filter by module name @@ -110,16 +113,32 @@ def run(self): if not re.search(args.name, name): continue + num_tests += 1 # run test - max_error, fps, fps_trt, ms, ms_trt = run(test) - - # write entry - line = '| %s | %s | %s | %s | %.2E | %.3g | %.3g | %.3g | %.3g |' % (name, test.dtype.__repr__().split('.')[-1], str(test.input_shapes), str(test.torch2trt_kwargs), max_error, fps, fps_trt, ms, ms_trt) - - if args.tolerance >= 0 and max_error > args.tolerance: - print(colored(line, 'yellow')) - else: - print(line) + try: + if args.use_onnx: + test.torch2trt_kwargs.update({'use_onnx': True}) + + max_error, fps, fps_trt, ms, ms_trt = run(test) + # write entry + line = '| %s | %s | %s | %s | %.2E | %.3g | %.3g | %.3g | %.3g |' % (name, test.dtype.__repr__().split('.')[-1], str(test.input_shapes), str(test.torch2trt_kwargs), max_error, fps, fps_trt, ms, ms_trt) + + if args.tolerance >= 0 and max_error > args.tolerance: + print(colored(line, 'yellow')) + num_tolerance += 1 + else: + print(line) + num_success += 1 + except: + line = '| %s | %s | %s | %s | N/A | N/A | N/A | N/A | N/A |' % (name, test.dtype.__repr__().split('.')[-1], str(test.input_shapes), str(test.torch2trt_kwargs)) + print(colored(line, 'red')) + num_error += 1 + with open(args.output, 'a+') as f: f.write(line + '\n') + + print('NUM_TESTS: %d' % num_tests) + print('NUM_SUCCESSFUL_CONVERSION: %d' % num_success) + print('NUM_FAILED_CONVERSION: %d' % num_error) + print('NUM_ABOVE_TOLERANCE: %d' % num_tolerance) \ No newline at end of file diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 8ba79cb5..5d4c8c70 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -2,6 +2,7 @@ import tensorrt as trt from copy import copy import numpy as np +import io from .calibration import ( TensorBatchDataset, @@ -255,7 +256,12 @@ def __exit__(self, type, val, tb): if self.method_impl: self._set_method(self.method_impl) +def default_input_names(num_inputs): + return ["input_%d" % i for i in range(num_inputs)] +def default_output_names(num_outputs): + return ["output_%d" % i for i in range(num_outputs)] + class ConversionContext(object): def __init__(self, network, converters=CONVERTERS): self.network = network @@ -279,7 +285,7 @@ def __exit__(self, type, val, tb): def add_inputs(self, torch_inputs, names=None): if names is None: - names = ["input_%d" % i for i in range(len(torch_inputs))] + names = default_input_names(len(torch_inputs)) self.input_names = names for i, torch_input in enumerate(torch_inputs): @@ -294,7 +300,7 @@ def add_inputs(self, torch_inputs, names=None): def mark_outputs(self, torch_outputs, names=None): if names is None: - names = ["output_%d" % i for i in range(len(torch_outputs))] + names = default_output_names(len(torch_outputs)) self.output_names = names for i, torch_output in enumerate(torch_outputs): @@ -372,7 +378,7 @@ def enable_profiling(self): if not self.context.profiler: self.context.profiler = trt.Profiler() - + def torch2trt(module, inputs, input_names=None, @@ -385,7 +391,8 @@ def torch2trt(module, keep_network=True, int8_mode=False, int8_calib_dataset=None, - int8_calib_algorithm=DEFAULT_CALIBRATION_ALGORITHM): + int8_calib_algorithm=DEFAULT_CALIBRATION_ALGORITHM, + use_onnx=False): inputs_in = inputs @@ -394,21 +401,43 @@ def torch2trt(module, logger = trt.Logger(log_level) builder = trt.Builder(logger) - network = builder.create_network() - - with ConversionContext(network) as ctx: + + if isinstance(inputs, list): + inputs = tuple(inputs) + if not isinstance(inputs, tuple): + inputs = (inputs,) + + # run once to get num outputs + outputs = module(*inputs) + if not isinstance(outputs, tuple) and not isinstance(outputs, list): + outputs = (outputs,) + + if input_names is None: + input_names = default_input_names(len(inputs)) + if output_names is None: + output_names = default_output_names(len(outputs)) + + if use_onnx: + + f = io.BytesIO() + torch.onnx.export(module, inputs, f, input_names=input_names, output_names=output_names) + f.seek(0) + onnx_bytes = f.read() + network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) + parser = trt.OnnxParser(network, logger) + parser.parse(onnx_bytes) + + else: + network = builder.create_network() + with ConversionContext(network) as ctx: - if isinstance(inputs, list): - inputs = tuple(inputs) - if not isinstance(inputs, tuple): - inputs = (inputs,) - ctx.add_inputs(inputs, input_names) + ctx.add_inputs(inputs, input_names) - outputs = module(*inputs) + outputs = module(*inputs) - if not isinstance(outputs, tuple) and not isinstance(outputs, list): - outputs = (outputs,) - ctx.mark_outputs(outputs, output_names) + if not isinstance(outputs, tuple) and not isinstance(outputs, list): + outputs = (outputs,) + ctx.mark_outputs(outputs, output_names) builder.max_workspace_size = max_workspace_size builder.fp16_mode = fp16_mode @@ -430,7 +459,7 @@ def torch2trt(module, engine = builder.build_cuda_engine(network) - module_trt = TRTModule(engine, ctx.input_names, ctx.output_names) + module_trt = TRTModule(engine, input_names, output_names) if keep_network: module_trt.network = network From b0a7724fdcae6af01f96b894a3c3d9df991bd96e Mon Sep 17 00:00:00 2001 From: Mike Vella Date: Thu, 2 Jul 2020 15:47:09 +0100 Subject: [PATCH 290/355] Add requirements.txt --- requirements.txt | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..3ef10d8a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +future==0.18.2 +numpy==1.19.0 +Pillow==7.2.0 +pkg-resources==0.0.0 +torch==1.5.1 +torchvision==0.6.1 From 2aa01dd194fa4baeaae1212c872782d25c447bd2 Mon Sep 17 00:00:00 2001 From: Mike Vella Date: Thu, 2 Jul 2020 15:49:36 +0100 Subject: [PATCH 291/355] Added install information Added information regarding installing dependencies --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6e41848f..8dfd772f 100644 --- a/README.md +++ b/README.md @@ -104,11 +104,16 @@ We tested the converter against these models using the [test.sh](test.sh) script To install without compiling plugins, call the following ```bash +pip install -r requirements.txt git clone https://github.com/NVIDIA-AI-IOT/torch2trt cd torch2trt -sudo python setup.py install +python setup.py install ``` +The tensorRT python API is also required, this can be obtained from the [Nvidia Developer Zone](https://developer.nvidia.com/nvidia-tensorrt-7x-download) + + + ### Option 2 - With plugins (experimental) To install with plugins to support some operations in PyTorch that are not natviely supported with TensorRT, call the following From ed37a06de415addaf079b02e7f50851c9a714295 Mon Sep 17 00:00:00 2001 From: Mike Vella Date: Mon, 6 Jul 2020 09:55:56 +0100 Subject: [PATCH 292/355] Update README.md FIxed error in instrucitons --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8dfd772f..d599216c 100644 --- a/README.md +++ b/README.md @@ -104,9 +104,9 @@ We tested the converter against these models using the [test.sh](test.sh) script To install without compiling plugins, call the following ```bash -pip install -r requirements.txt git clone https://github.com/NVIDIA-AI-IOT/torch2trt cd torch2trt +pip install -r requirements.txt python setup.py install ``` From f7ff4677fbcc23502a6a04e466ca3e261bc4de12 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 8 Jul 2020 14:05:05 -0700 Subject: [PATCH 293/355] readme tensorrt install tip --- README.md | 8 ++------ requirements.txt | 6 ------ 2 files changed, 2 insertions(+), 12 deletions(-) delete mode 100644 requirements.txt diff --git a/README.md b/README.md index d599216c..4c5d0fc6 100644 --- a/README.md +++ b/README.md @@ -99,6 +99,8 @@ We tested the converter against these models using the [test.sh](test.sh) script ## Setup +> torch2trt depends on the TensorRT Python API. On Jetson, this is included with the latest JetPack. For desktop, please follow the [TensorRT Installation Guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html). You may also try installing torch2trt inside one of the NGC PyTorch docker containers for [Desktop](https://ngc.nvidia.com/catalog/containers/nvidia:pytorch) or [Jetson](https://ngc.nvidia.com/catalog/containers/nvidia:l4t-pytorch). + ### Option 1 - Without plugins To install without compiling plugins, call the following @@ -106,14 +108,9 @@ To install without compiling plugins, call the following ```bash git clone https://github.com/NVIDIA-AI-IOT/torch2trt cd torch2trt -pip install -r requirements.txt python setup.py install ``` -The tensorRT python API is also required, this can be obtained from the [Nvidia Developer Zone](https://developer.nvidia.com/nvidia-tensorrt-7x-download) - - - ### Option 2 - With plugins (experimental) To install with plugins to support some operations in PyTorch that are not natviely supported with TensorRT, call the following @@ -121,7 +118,6 @@ To install with plugins to support some operations in PyTorch that are not natvi > Please note, this currently only includes the interpolate plugin. This plugin requires PyTorch 1.3+ for serialization. ```bash -sudo apt-get install libprotobuf* protobuf-compiler ninja-build git clone https://github.com/NVIDIA-AI-IOT/torch2trt cd torch2trt sudo python setup.py install --plugins diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 3ef10d8a..00000000 --- a/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -future==0.18.2 -numpy==1.19.0 -Pillow==7.2.0 -pkg-resources==0.0.0 -torch==1.5.1 -torchvision==0.6.1 From f9828ccfdf35ee24719d0bc0b00cc98fc66a7be5 Mon Sep 17 00:00:00 2001 From: Kshitij Srivastava Date: Tue, 25 Aug 2020 10:32:32 -0400 Subject: [PATCH 294/355] added preliminary code for narrow --- narrow_test.py | 20 +++++++++++++++++++ torch2trt/converters/__init__.py | 1 + torch2trt/converters/narrow.py | 33 ++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+) create mode 100644 narrow_test.py create mode 100644 torch2trt/converters/narrow.py diff --git a/narrow_test.py b/narrow_test.py new file mode 100644 index 00000000..25683ecc --- /dev/null +++ b/narrow_test.py @@ -0,0 +1,20 @@ +import torch +class Foo(torch.nn.Module): + def __init__(self,dim,start,length): + super().__init__() + self.start = start + self.dim=dim + self.length=length + def forward(self, x): + return torch.narrow(x,self.dim,self.start,self.length) + +if __name__ == "__main__": + model = Foo(2,0,50).eval().cuda() + x = torch.randn([1,3,224,224], device='cuda') + y = model(x) + + from torch2trt import torch2trt + model_trt = torch2trt(model, [x]) + y_trt = model_trt(x) + print(y.size()) + print(y_trt.size()) diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 2bb0c828..7e50ab3e 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -38,6 +38,7 @@ from .min import * from .mul import * from .normalize import * +from .narrow import * from .pad import * from .permute import * from .pow import * diff --git a/torch2trt/converters/narrow.py b/torch2trt/converters/narrow.py new file mode 100644 index 00000000..4eda4db1 --- /dev/null +++ b/torch2trt/converters/narrow.py @@ -0,0 +1,33 @@ +import tensorrt as trt +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.narrow') +def convert_narrow(ctx): + inputs = get_arg(ctx, 'input', pos=0, default=None) + dim = get_arg(ctx, 'dim', pos=1, default=0) + start = get_arg(ctx, 'start', pos=2, default=None) + print("start",start, type(start)) + input_trt= trt_(ctx.network, inputs) + + output = ctx.method_return + output_shape = list(output.size()) + #print(type(trt.Dims(start)),type(trt.Tensorrt.Dims(start))) + print(type(trt.tensorrt.Dims(output_shape))) + layer = ctx.network.add_slice(inputs=input_trt,shape=trt.tensorrt.Dims(output_shape)) + output._trt = layer.get_output(0) + +class narrow(torch.nn.Module): + def __init__(self, dim, start, length): + super(Cat, self).__init__() + self.dim = dim + self.start = start + self.length = length + + def forward(self, *x): + return torch.narrow(x, dim=self.dim,start=self.start,length=self.length) + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 4, 4), (1, 3, 4), (1, 17, 4)]) +def test_narrow_basic(): + return narrow(1,0,2) From 5c4ad8fe96ea3596f2a15dfb3f43cfcdb847169c Mon Sep 17 00:00:00 2001 From: Kshitij Srivastava Date: Fri, 28 Aug 2020 11:48:10 -0400 Subject: [PATCH 295/355] narrow converter added successfully --- narrow_test.py | 20 ----------------- torch2trt/converters/narrow.py | 41 ++++++++++++++++++++-------------- 2 files changed, 24 insertions(+), 37 deletions(-) delete mode 100644 narrow_test.py diff --git a/narrow_test.py b/narrow_test.py deleted file mode 100644 index 25683ecc..00000000 --- a/narrow_test.py +++ /dev/null @@ -1,20 +0,0 @@ -import torch -class Foo(torch.nn.Module): - def __init__(self,dim,start,length): - super().__init__() - self.start = start - self.dim=dim - self.length=length - def forward(self, x): - return torch.narrow(x,self.dim,self.start,self.length) - -if __name__ == "__main__": - model = Foo(2,0,50).eval().cuda() - x = torch.randn([1,3,224,224], device='cuda') - y = model(x) - - from torch2trt import torch2trt - model_trt = torch2trt(model, [x]) - y_trt = model_trt(x) - print(y.size()) - print(y_trt.size()) diff --git a/torch2trt/converters/narrow.py b/torch2trt/converters/narrow.py index 4eda4db1..dcf1d696 100644 --- a/torch2trt/converters/narrow.py +++ b/torch2trt/converters/narrow.py @@ -2,32 +2,39 @@ from torch2trt.torch2trt import * from torch2trt.module_test import add_module_test - +@tensorrt_converter('torch.Tensor.narrow') @tensorrt_converter('torch.narrow') def convert_narrow(ctx): - inputs = get_arg(ctx, 'input', pos=0, default=None) - dim = get_arg(ctx, 'dim', pos=1, default=0) + inputs = get_arg(ctx, 'input', pos=0, default=None) start = get_arg(ctx, 'start', pos=2, default=None) - print("start",start, type(start)) - input_trt= trt_(ctx.network, inputs) - output = ctx.method_return - output_shape = list(output.size()) - #print(type(trt.Dims(start)),type(trt.Tensorrt.Dims(start))) - print(type(trt.tensorrt.Dims(output_shape))) - layer = ctx.network.add_slice(inputs=input_trt,shape=trt.tensorrt.Dims(output_shape)) + shape = list(inputs.shape) + start = [0]*len(shape) + stride = [1]*len(shape) + dim = ctx.method_args[1] if get_arg(ctx, 'dim', pos=1, default=0) >=0 else len(shape)+get_arg(ctx, 'dim', pos=1, default=0) + start[dim] = ctx.method_args[2] + shape[dim] = ctx.method_args[3] + # not consider batch dimension + input_trt = trt_(ctx.network,inputs) + layer = ctx.network.add_slice(input=input_trt,start=start[1:], shape=shape[1:],stride=stride[1:]) output._trt = layer.get_output(0) -class narrow(torch.nn.Module): +class Narrow(torch.nn.Module): def __init__(self, dim, start, length): - super(Cat, self).__init__() + super(Narrow, self).__init__() self.dim = dim self.start = start self.length = length - def forward(self, *x): - return torch.narrow(x, dim=self.dim,start=self.start,length=self.length) + def forward(self, x): + return torch.narrow(x,self.dim,self.start,self.length) + +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,224,224)]) +def test_narrow1(): + return Narrow(1,0,2) + +@add_module_test(torch.float32, torch.device('cuda'), [(1,3,224,224)]) +def test_narrow2(): + return Narrow(2,0,50) + -@add_module_test(torch.float32, torch.device('cuda'), [(1, 4, 4), (1, 3, 4), (1, 17, 4)]) -def test_narrow_basic(): - return narrow(1,0,2) From f3fe46c97872bcf147f33fb3340812e0bb66d6b1 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 31 Aug 2020 21:56:04 +0000 Subject: [PATCH 296/355] softmax neg dim --- torch2trt/converters/softmax.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/torch2trt/converters/softmax.py b/torch2trt/converters/softmax.py index 74c56d72..1b9d8441 100644 --- a/torch2trt/converters/softmax.py +++ b/torch2trt/converters/softmax.py @@ -13,6 +13,12 @@ def convert_softmax(ctx): dim = ctx.method_kwargs['dim'] elif len(ctx.method_args) >= 2: dim = ctx.method_args[1] + + # convert negative dims +# import pdb +# pdb.set_trace() + if dim < 0: + dim = input.ndim + dim axes = 1 << (dim - 1) @@ -31,3 +37,14 @@ def test_softmax_module(): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) def test_softmax_module_dim2(): return torch.nn.Softmax(2) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_softmax_module_neg1(): + return torch.nn.Softmax(-1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) +def test_softmax_module_dim_neg2(): + return torch.nn.Softmax(-2) \ No newline at end of file From d245496556aceb9510c280c8740dd3f121f5e10c Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 1 Sep 2020 21:27:54 +0000 Subject: [PATCH 297/355] added add_missing_trt_tensors and broadcast_trt_tensors --- torch2trt/converters/activation.py | 10 ++--- torch2trt/torch2trt.py | 63 ++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 5 deletions(-) diff --git a/torch2trt/converters/activation.py b/torch2trt/converters/activation.py index 6a10f365..63da5ae8 100644 --- a/torch2trt/converters/activation.py +++ b/torch2trt/converters/activation.py @@ -18,7 +18,7 @@ def convert_leaky_relu(ctx): negative_slope = get_arg(ctx, 'negative_slope', pos=1, default=0.01) output = ctx.method_return - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] layer = ctx.network.add_activation(input_trt, trt.ActivationType.LEAKY_RELU) layer.alpha = negative_slope @@ -40,7 +40,7 @@ def convert_elu(ctx): alpha = get_arg(ctx, 'alpha', pos=1, default=1.0) output = ctx.method_return - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] layer = ctx.network.add_activation(input_trt, trt.ActivationType.ELU) layer.alpha = alpha @@ -63,7 +63,7 @@ def convert_selu(ctx): alpha = get_arg(ctx, 'alpha', pos=1, default=1.0) output = ctx.method_return - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] layer = ctx.network.add_activation(input_trt, trt.ActivationType.SELU) layer.alpha = 1.6732632423543772848170429916717 layer.beta = 1.0507009873554804934193349852946 @@ -84,7 +84,7 @@ def convert_softsign(ctx): input = get_arg(ctx, 'input', pos=0, default=None) output = ctx.method_return - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] layer = ctx.network.add_activation(input_trt, trt.ActivationType.SOFTSIGN) output._trt = layer.get_output(0) @@ -103,7 +103,7 @@ def convert_softplus(ctx): input = get_arg(ctx, 'input', pos=0, default=None) output = ctx.method_return - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] layer = ctx.network.add_activation(input_trt, trt.ActivationType.SOFTPLUS) output._trt = layer.get_output(0) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 5d4c8c70..98df3ff4 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -114,7 +114,70 @@ def check_torch_dtype(*tensors): ) # , 'Data type could not be inferred from any item in list') return dtype + +def add_missing_trt_tensors(network, tensors): + """Creates missing TensorRT tensors as constants and attaches them to the Torch Tensors""" + trt_tensors = [None] * len(tensors) + + dtype = check_torch_dtype(*tensors) + + for i, t in enumerate(tensors): + trt_tensor = None + + # GET TRT TENSOR (OR CREATE TRT CONSTANT) + + # get tensor w/ _trt + if isinstance(t, torch.Tensor) and hasattr(t, "_trt"): + trt_tensor = t._trt + # or... add constant for leaf tensor w/o _trt + elif isinstance(t, torch.Tensor) and not hasattr(t, "_trt"): + + # remove all preceding ones, these can be re-inserted later when broadcasting + num_preceding_ones = 0 + for i in range(t.ndim): + if int(t.shape[i]) == 1: + num_preceding_ones += 1 + shape = tuple(t.shape[num_preceding_ones:]) + + weight = t.detach().cpu().numpy() + t._trt = network.add_constant(shape, weight).get_output(0) + trt_tensor = t._trt + + # or... add constant for scalar primitive + elif isinstance(t, float) or isinstance(t, int): + shape = (1,) + scalar = t * torch.ones(shape, dtype=dtype).cpu().numpy() + trt_tensor = network.add_constant(shape, scalar).get_output(0) + + assert trt_tensor is not None + + trt_tensors[i] = trt_tensor + + return trt_tensors + + +def broadcast_trt_tensors(network, trt_tensors, broadcast_ndim): + """Broadcast TensorRT tensors to the specified dimension by pre-padding shape 1 dims""" + broadcasted_trt_tensors = [None] * len(tensors) + + for i, t in enumerate(trt_tensors): + + if len(trt_tensor.shape) < broadcast_ndim: + # append 1 size dims to front + diff = broadcast_ndim - len(trt_tensor.shape) + shape = tuple([1] * diff + list(trt_tensor.shape)) + layer = network.add_shuffle(trt_tensor) + layer.reshape_dims = shape + trt_tensor = layer.get_output(0) + else: + trt_tensor = t + + broadcasted_trt_tensors[i] = trt_tensor + + return broadcast_trt_tensors + + def trt_(network, *tensors): """Creates missing TensorRT tensors and adds shuffle layers to make tensors broadcastable""" trt_tensors = [None] * len(tensors) From fb8da9a6df3fd18fad11ff6e1756f8799f4d3713 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 1 Sep 2020 21:36:10 +0000 Subject: [PATCH 298/355] bugfix and add --- torch2trt/converters/AdaptiveAvgPool2d.py | 2 +- torch2trt/converters/add.py | 3 ++- torch2trt/torch2trt.py | 12 ++++++------ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/torch2trt/converters/AdaptiveAvgPool2d.py b/torch2trt/converters/AdaptiveAvgPool2d.py index b92fb975..41ad141d 100644 --- a/torch2trt/converters/AdaptiveAvgPool2d.py +++ b/torch2trt/converters/AdaptiveAvgPool2d.py @@ -8,7 +8,7 @@ def convert_AdaptiveAvgPool2d(ctx): input = ctx.method_args[1] output = ctx.method_return - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output_size = module.output_size if not isinstance(output_size, tuple): diff --git a/torch2trt/converters/add.py b/torch2trt/converters/add.py index 0ca9e095..b6355a75 100644 --- a/torch2trt/converters/add.py +++ b/torch2trt/converters/add.py @@ -9,8 +9,9 @@ def convert_add(ctx): input_a = ctx.method_args[0] input_b = ctx.method_args[1] - input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) output = ctx.method_return + input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUM) output._trt = layer.get_output(0) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 98df3ff4..a2faa885 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -159,15 +159,15 @@ def add_missing_trt_tensors(network, tensors): def broadcast_trt_tensors(network, trt_tensors, broadcast_ndim): """Broadcast TensorRT tensors to the specified dimension by pre-padding shape 1 dims""" - broadcasted_trt_tensors = [None] * len(tensors) + broadcasted_trt_tensors = [None] * len(trt_tensors) for i, t in enumerate(trt_tensors): - if len(trt_tensor.shape) < broadcast_ndim: + if len(t.shape) < broadcast_ndim: # append 1 size dims to front - diff = broadcast_ndim - len(trt_tensor.shape) - shape = tuple([1] * diff + list(trt_tensor.shape)) - layer = network.add_shuffle(trt_tensor) + diff = broadcast_ndim - len(t.shape) + shape = tuple([1] * diff + list(t.shape)) + layer = network.add_shuffle(t) layer.reshape_dims = shape trt_tensor = layer.get_output(0) else: @@ -175,7 +175,7 @@ def broadcast_trt_tensors(network, trt_tensors, broadcast_ndim): broadcasted_trt_tensors[i] = trt_tensor - return broadcast_trt_tensors + return broadcasted_trt_tensors def trt_(network, *tensors): From d6e6f3b22c33e03384cd794fd71b35cdb580b69d Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 1 Sep 2020 21:47:14 +0000 Subject: [PATCH 299/355] add tests --- torch2trt/converters/add.py | 30 +++++++++++++++++++++++++++++- torch2trt/test.py | 3 +++ torch2trt/torch2trt.py | 4 ++-- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/torch2trt/converters/add.py b/torch2trt/converters/add.py index b6355a75..62c93e7e 100644 --- a/torch2trt/converters/add.py +++ b/torch2trt/converters/add.py @@ -78,4 +78,32 @@ def forward(self, x): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) def test_add_radd_float(): - return RAddFloat() \ No newline at end of file + return RAddFloat() + + +class AddConstantNoBatch(torch.nn.Module): + def __init__(self): + super(AddConstantNoBatch, self).__init__() + self.register_buffer('y', torch.ones((3, 10, 10))) + + def forward(self, x): + return x + self.y + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)]) +def test_add_constant_nobatch(): + return AddConstantNoBatch() + + +class AddConstantBatch(torch.nn.Module): + def __init__(self): + super(AddConstantBatch, self).__init__() + self.register_buffer('y', torch.ones((1, 3, 10, 10))) + + def forward(self, x): + return x + self.y + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)]) +def test_add_constant_batch(): + return AddConstantBatch() \ No newline at end of file diff --git a/torch2trt/test.py b/torch2trt/test.py index de5046ba..dec9bb88 100644 --- a/torch2trt/test.py +++ b/torch2trt/test.py @@ -4,6 +4,7 @@ import argparse import re import runpy +import traceback from termcolor import colored @@ -134,6 +135,8 @@ def run(self): line = '| %s | %s | %s | %s | N/A | N/A | N/A | N/A | N/A |' % (name, test.dtype.__repr__().split('.')[-1], str(test.input_shapes), str(test.torch2trt_kwargs)) print(colored(line, 'red')) num_error += 1 + tb = traceback.format_exc() + print(tb) with open(args.output, 'a+') as f: f.write(line + '\n') diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index a2faa885..25b4fa67 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -135,8 +135,8 @@ def add_missing_trt_tensors(network, tensors): # remove all preceding ones, these can be re-inserted later when broadcasting num_preceding_ones = 0 - for i in range(t.ndim): - if int(t.shape[i]) == 1: + for j in range(t.ndim): + if int(t.shape[j]) == 1: num_preceding_ones += 1 shape = tuple(t.shape[num_preceding_ones:]) From d14c6b7955adee9847435df6bd3d79e62ce34649 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 1 Sep 2020 22:01:05 +0000 Subject: [PATCH 300/355] div --- torch2trt/converters/BatchNorm1d.py | 2 +- torch2trt/converters/BatchNorm2d.py | 2 +- torch2trt/converters/Conv.py | 2 +- torch2trt/converters/Conv1d.py | 2 +- torch2trt/converters/Conv2d.py | 2 +- torch2trt/converters/ConvTranspose.py | 2 +- torch2trt/converters/ConvTranspose2d.py | 2 +- torch2trt/converters/avg_pool.py | 4 +-- torch2trt/converters/batch_norm.py | 2 +- torch2trt/converters/cat.py | 3 ++- torch2trt/converters/clamp.py | 14 +++++----- torch2trt/converters/compare.py | 3 ++- torch2trt/converters/div.py | 36 ++++++++++++++++++++++--- 13 files changed, 55 insertions(+), 21 deletions(-) diff --git a/torch2trt/converters/BatchNorm1d.py b/torch2trt/converters/BatchNorm1d.py index 89bda117..52249db1 100644 --- a/torch2trt/converters/BatchNorm1d.py +++ b/torch2trt/converters/BatchNorm1d.py @@ -6,7 +6,7 @@ def convert_BatchNorm2d(ctx): module = ctx.method_args[0] input = ctx.method_args[1] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return scale = module.weight.detach().cpu().numpy() / np.sqrt(module.running_var.detach().cpu().numpy() + module.eps) diff --git a/torch2trt/converters/BatchNorm2d.py b/torch2trt/converters/BatchNorm2d.py index b13349dd..77d5af31 100644 --- a/torch2trt/converters/BatchNorm2d.py +++ b/torch2trt/converters/BatchNorm2d.py @@ -6,7 +6,7 @@ def convert_BatchNorm2d(ctx): module = ctx.method_args[0] input = ctx.method_args[1] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return scale = module.weight.detach().cpu().numpy() / np.sqrt( diff --git a/torch2trt/converters/Conv.py b/torch2trt/converters/Conv.py index 95cf7bde..512cddbc 100644 --- a/torch2trt/converters/Conv.py +++ b/torch2trt/converters/Conv.py @@ -7,7 +7,7 @@ def convert_Conv_trt7(ctx): module = ctx.method_args[0] input = ctx.method_args[1] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return input_dim = input.dim() - 2 diff --git a/torch2trt/converters/Conv1d.py b/torch2trt/converters/Conv1d.py index fe6cf189..2f54b695 100644 --- a/torch2trt/converters/Conv1d.py +++ b/torch2trt/converters/Conv1d.py @@ -6,7 +6,7 @@ def convert_Conv1d(ctx): module = ctx.method_args[0] input = ctx.method_args[1] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return kernel_size = (module.kernel_size[0], 1) diff --git a/torch2trt/converters/Conv2d.py b/torch2trt/converters/Conv2d.py index 40af1cdc..d37f3de2 100644 --- a/torch2trt/converters/Conv2d.py +++ b/torch2trt/converters/Conv2d.py @@ -6,7 +6,7 @@ def convert_Conv2d(ctx): module = ctx.method_args[0] input = ctx.method_args[1] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return kernel_size = module.kernel_size diff --git a/torch2trt/converters/ConvTranspose.py b/torch2trt/converters/ConvTranspose.py index 1f88d46a..4def33ce 100644 --- a/torch2trt/converters/ConvTranspose.py +++ b/torch2trt/converters/ConvTranspose.py @@ -7,7 +7,7 @@ def convert_ConvTranspose2d_trt7(ctx): module = ctx.method_args[0] input = ctx.method_args[1] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return input_dim = input.dim() - 2 diff --git a/torch2trt/converters/ConvTranspose2d.py b/torch2trt/converters/ConvTranspose2d.py index 05dee7fc..719b608d 100644 --- a/torch2trt/converters/ConvTranspose2d.py +++ b/torch2trt/converters/ConvTranspose2d.py @@ -5,7 +5,7 @@ def convert_ConvTranspose2d(ctx): module = ctx.method_args[0] input = ctx.method_args[1] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return kernel_size = module.kernel_size diff --git a/torch2trt/converters/avg_pool.py b/torch2trt/converters/avg_pool.py index ec22f76d..185af508 100644 --- a/torch2trt/converters/avg_pool.py +++ b/torch2trt/converters/avg_pool.py @@ -13,7 +13,7 @@ def convert_avg_pool2d(ctx): count_include_pad = get_arg(ctx, "count_include_pad", pos=5, default=True) # get input trt tensor (or create constant if it doesn't exist) - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return @@ -55,7 +55,7 @@ def convert_avg_pool_trt7(ctx): count_include_pad = get_arg(ctx, 'count_include_pad', pos=5, default=True) # get input trt tensor (or create constant if it doesn't exist) - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return input_dim = input.dim() - 2 diff --git a/torch2trt/converters/batch_norm.py b/torch2trt/converters/batch_norm.py index 50ba62f6..30e7582f 100644 --- a/torch2trt/converters/batch_norm.py +++ b/torch2trt/converters/batch_norm.py @@ -12,7 +12,7 @@ def convert_batch_norm_trt7(ctx): bias = get_arg(ctx, 'bias', pos=4, default=None) eps = get_arg(ctx, 'eps', pos=7, default=10e-6) - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return scale = weight.detach().cpu().numpy() / np.sqrt(running_var.detach().cpu().numpy() + eps) diff --git a/torch2trt/converters/cat.py b/torch2trt/converters/cat.py index 109fadf3..10d85c34 100644 --- a/torch2trt/converters/cat.py +++ b/torch2trt/converters/cat.py @@ -8,7 +8,8 @@ def convert_cat(ctx): dim = get_arg(ctx, 'dim', pos=1, default=0) output = ctx.method_return - trt_inputs = [trt_(ctx.network, i) for i in inputs] + trt_inputs = add_missing_trt_tensors(ctx.network, inputs) + trt_inputs = broadcast_trt_tensors(ctx.network, trt_inputs, output.ndim - 1) layer = ctx.network.add_concatenation(inputs=trt_inputs) layer.axis = dim - 1 diff --git a/torch2trt/converters/clamp.py b/torch2trt/converters/clamp.py index f326a276..5cc22577 100644 --- a/torch2trt/converters/clamp.py +++ b/torch2trt/converters/clamp.py @@ -20,7 +20,7 @@ def __add_clamp(network, trt_input, val, op): @tensorrt_converter('torch.Tensor.clamp_min') def convert_clamp_min(ctx): input = ctx.method_args[0] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] val = ctx.method_args[1] output = ctx.method_return @@ -56,10 +56,11 @@ def test_tensor_clamp_min(): @tensorrt_converter('torch.Tensor.clamp_max') def convert_clamp_max(ctx): input = ctx.method_args[0] + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] val = ctx.method_args[1] output = ctx.method_return - layer = __add_clamp(ctx.network, input._trt, val, trt.ElementWiseOperation.MIN) + layer = __add_clamp(ctx.network, input_trt, val, trt.ElementWiseOperation.MIN) output._trt = layer.get_output(0) @@ -90,22 +91,23 @@ def test_tensor_clamp_max(): @tensorrt_converter('torch.Tensor.clamp') def convert_clamp(ctx): input = ctx.method_args[0] + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return if "min" in ctx.method_kwargs and "max" in ctx.method_kwargs: min_val = ctx.method_kwargs["min"] max_val = ctx.method_kwargs["max"] - layer = __add_clamp(ctx.network, input._trt, min_val, trt.ElementWiseOperation.MAX) + layer = __add_clamp(ctx.network, input_trt, min_val, trt.ElementWiseOperation.MAX) layer = __add_clamp(ctx.network, layer.get_output(0), max_val, trt.ElementWiseOperation.MIN) elif "min" in ctx.method_kwargs: min_val = ctx.method_kwargs["min"] - layer = __add_clamp(ctx.network, input._trt, min_val, trt.ElementWiseOperation.MAX) + layer = __add_clamp(ctx.network, input_trt, min_val, trt.ElementWiseOperation.MAX) elif "max" in ctx.method_kwargs: max_val = ctx.method_kwargs["max"] - layer = __add_clamp(ctx.network, input._trt, max_val, trt.ElementWiseOperation.MIN) + layer = __add_clamp(ctx.network, input_trt, max_val, trt.ElementWiseOperation.MIN) else: min_val = ctx.method_args[1] max_val = ctx.method_args[2] - layer = __add_clamp(ctx.network, input._trt, min_val, trt.ElementWiseOperation.MAX) + layer = __add_clamp(ctx.network, input_trt, min_val, trt.ElementWiseOperation.MAX) layer = __add_clamp(ctx.network, layer.get_output(0), max_val, trt.ElementWiseOperation.MIN) output._trt = layer.get_output(0) diff --git a/torch2trt/converters/compare.py b/torch2trt/converters/compare.py index aa152ec2..b16f2f0f 100644 --- a/torch2trt/converters/compare.py +++ b/torch2trt/converters/compare.py @@ -4,8 +4,9 @@ def convert_elementwise(ctx, op): input_a = ctx.method_args[0] input_b = ctx.method_args[1] - input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) output = ctx.method_return + input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, op) output._trt = layer.get_output(0) diff --git a/torch2trt/converters/div.py b/torch2trt/converters/div.py index 4da1368d..ba2e4f94 100644 --- a/torch2trt/converters/div.py +++ b/torch2trt/converters/div.py @@ -10,8 +10,9 @@ def convert_div(ctx): input_a = ctx.method_args[0] input_b = ctx.method_args[1] - input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) output = ctx.method_return + input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.DIV) output._trt = layer.get_output(0) @@ -21,8 +22,9 @@ def convert_div(ctx): def convert_rdiv(ctx): input_a = ctx.method_args[1] # inputs switched for rdiv input_b = ctx.method_args[0] - input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) output = ctx.method_return + input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.DIV) output._trt = layer.get_output(0) @@ -90,4 +92,32 @@ def forward(self, x): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) def test_rdiv_float(): - return RDivFloat() \ No newline at end of file + return RDivFloat() + + +class DivConstantNoBatch(torch.nn.Module): + def __init__(self): + super(DivConstantNoBatch, self).__init__() + self.register_buffer('y', torch.ones((3, 10, 10))) + + def forward(self, x): + return x / self.y + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)]) +def test_div_constant_nobatch(): + return DivConstantNoBatch() + + +class DivConstantBatch(torch.nn.Module): + def __init__(self): + super(DivConstantBatch, self).__init__() + self.register_buffer('y', torch.ones((1, 3, 10, 10))) + + def forward(self, x): + return x / self.y + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)]) +def test_div_constant_batch(): + return DivConstantBatch() \ No newline at end of file From a08e6dc93a0e2c820be1433bcc26867c0fa897b5 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 1 Sep 2020 22:27:14 +0000 Subject: [PATCH 301/355] dimfix --- torch2trt/converters/Identity.py | 2 +- torch2trt/converters/Linear.py | 2 +- torch2trt/converters/LogSoftmax.py | 2 +- torch2trt/converters/ReLU.py | 2 +- torch2trt/converters/ReLU6.py | 7 +++--- torch2trt/converters/identity.py | 2 +- torch2trt/converters/interpolate.py | 4 ++-- torch2trt/converters/max.py | 5 +++-- torch2trt/converters/max_pool2d.py | 2 +- torch2trt/converters/mean.py | 2 +- torch2trt/converters/min.py | 5 +++-- torch2trt/converters/mul.py | 33 +++++++++++++++++++++++++-- torch2trt/converters/normalize.py | 3 ++- torch2trt/converters/pad.py | 2 +- torch2trt/converters/permute.py | 2 +- torch2trt/converters/pow.py | 6 +++-- torch2trt/converters/prelu.py | 2 +- torch2trt/converters/prod.py | 2 +- torch2trt/converters/sigmoid.py | 2 +- torch2trt/converters/softmax.py | 2 +- torch2trt/converters/split.py | 2 +- torch2trt/converters/sub.py | 35 ++++++++++++++++++++++++++--- torch2trt/converters/sum.py | 4 ++-- torch2trt/converters/tanh.py | 2 +- torch2trt/converters/transpose.py | 4 ++-- torch2trt/converters/unary.py | 2 +- torch2trt/converters/view.py | 2 +- torch2trt/torch2trt.py | 16 +++++++------ 28 files changed, 111 insertions(+), 45 deletions(-) diff --git a/torch2trt/converters/Identity.py b/torch2trt/converters/Identity.py index 761aff50..1934666d 100644 --- a/torch2trt/converters/Identity.py +++ b/torch2trt/converters/Identity.py @@ -6,6 +6,6 @@ @tensorrt_converter('torch.nn.Dropout3d.forward') def convert_Identity(ctx): input = ctx.method_args[1] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return output._trt = input_trt \ No newline at end of file diff --git a/torch2trt/converters/Linear.py b/torch2trt/converters/Linear.py index 6b896eed..323a474f 100644 --- a/torch2trt/converters/Linear.py +++ b/torch2trt/converters/Linear.py @@ -6,7 +6,7 @@ def convert_Linear(ctx): module = ctx.method_args[0] input = ctx.method_args[1] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return # reshape to ...xNx1x1 diff --git a/torch2trt/converters/LogSoftmax.py b/torch2trt/converters/LogSoftmax.py index 38108780..83ac4a95 100644 --- a/torch2trt/converters/LogSoftmax.py +++ b/torch2trt/converters/LogSoftmax.py @@ -4,7 +4,7 @@ @tensorrt_converter('torch.nn.LogSoftmax.forward') def convert_LogSoftmax(ctx): input = ctx.method_args[1] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return layer = ctx.network.add_softmax(input=input_trt) layer = ctx.network.add_unary(input=layer.get_output(0), diff --git a/torch2trt/converters/ReLU.py b/torch2trt/converters/ReLU.py index e10feb27..481f4b8a 100644 --- a/torch2trt/converters/ReLU.py +++ b/torch2trt/converters/ReLU.py @@ -4,7 +4,7 @@ @tensorrt_converter('torch.nn.ReLU.forward') def convert_ReLU(ctx): input = ctx.method_args[1] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return layer = ctx.network.add_activation( input=input_trt, type=trt.ActivationType.RELU) diff --git a/torch2trt/converters/ReLU6.py b/torch2trt/converters/ReLU6.py index 7c9ff588..8b5af6a1 100644 --- a/torch2trt/converters/ReLU6.py +++ b/torch2trt/converters/ReLU6.py @@ -7,12 +7,13 @@ def convert_ReLU6(ctx): input = ctx.method_args[1] output = ctx.method_return - input_trt, trt_6 = trt_(ctx.network, input, 6) + input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input, 6]) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) layer = ctx.network.add_activation( - input=input_trt, type=trt.ActivationType.RELU) + input=input_a_trt, type=trt.ActivationType.RELU) layer = ctx.network.add_elementwise( - layer.get_output(0), trt_6, trt.ElementWiseOperation.MIN) + layer.get_output(0), input_b_trt, trt.ElementWiseOperation.MIN) output._trt = layer.get_output(0) diff --git a/torch2trt/converters/identity.py b/torch2trt/converters/identity.py index 45a02fd9..bac1bd99 100644 --- a/torch2trt/converters/identity.py +++ b/torch2trt/converters/identity.py @@ -7,6 +7,6 @@ @tensorrt_converter('torch.nn.functional.dropout3d') def convert_identity(ctx): input = ctx.method_args[0] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return output._trt = input_trt diff --git a/torch2trt/converters/interpolate.py b/torch2trt/converters/interpolate.py index 96a63d7d..dfa20d19 100644 --- a/torch2trt/converters/interpolate.py +++ b/torch2trt/converters/interpolate.py @@ -24,7 +24,7 @@ def get_interpolate_plugin(size, mode, align_corners): @tensorrt_converter('torch.nn.functional.interpolate', enabled=trt_version() < '7.1' and has_interpolate_plugin()) def convert_interpolate_plugin(ctx): input = ctx.method_args[0] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return try: @@ -60,7 +60,7 @@ def convert_interpolate_trt7(ctx): input_dim = input.dim() - 2 - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return layer = ctx.network.add_resize(input=input_trt) diff --git a/torch2trt/converters/max.py b/torch2trt/converters/max.py index afcb7954..b7bbe97f 100644 --- a/torch2trt/converters/max.py +++ b/torch2trt/converters/max.py @@ -6,8 +6,9 @@ def __convert_max_elementwise(ctx): input_a = ctx.method_args[0] input_b = ctx.method_args[1] - input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) output = ctx.method_return + input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.MAX) output._trt = layer.get_output(0) @@ -16,7 +17,7 @@ def __convert_max_reduce(ctx): input = ctx.method_args[0] dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1, input.ndim))) keepdim = get_arg(ctx, 'keepdim', pos=2, default=False) - input_trt= trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output_val = ctx.method_return[0] output_idx = ctx.method_return[1] layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.MAX, torch_dim_to_trt_axes(dim), keepdim) diff --git a/torch2trt/converters/max_pool2d.py b/torch2trt/converters/max_pool2d.py index e2fc9859..cc4fddeb 100644 --- a/torch2trt/converters/max_pool2d.py +++ b/torch2trt/converters/max_pool2d.py @@ -13,7 +13,7 @@ def convert_max_pool2d(ctx): ceil_mode = get_arg(ctx, 'ceil_mode', pos=5, default=False) # get input trt tensor (or create constant if it doesn't exist) - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return diff --git a/torch2trt/converters/mean.py b/torch2trt/converters/mean.py index 4ad2a9e1..c75689e4 100644 --- a/torch2trt/converters/mean.py +++ b/torch2trt/converters/mean.py @@ -6,7 +6,7 @@ @tensorrt_converter('torch.Tensor.mean') def convert_mean(ctx): input = ctx.method_args[0] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return # get dims from args or kwargs diff --git a/torch2trt/converters/min.py b/torch2trt/converters/min.py index 109e77b3..f58d45dc 100644 --- a/torch2trt/converters/min.py +++ b/torch2trt/converters/min.py @@ -6,8 +6,9 @@ def __convert_min_elementwise(ctx): input_a = ctx.method_args[0] input_b = ctx.method_args[1] - input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) output = ctx.method_return + input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.MIN) output._trt = layer.get_output(0) @@ -16,7 +17,7 @@ def __convert_min_reduce(ctx): input = ctx.method_args[0] dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1,input.ndim))) keepdim = get_arg(ctx, 'keepdim', pos=2, default=False) - input_trt= trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output_val = ctx.method_return[0] output_idx = ctx.method_return[1] layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.MIN, torch_dim_to_trt_axes(dim), keepdim) diff --git a/torch2trt/converters/mul.py b/torch2trt/converters/mul.py index d5f3bb31..8e527817 100644 --- a/torch2trt/converters/mul.py +++ b/torch2trt/converters/mul.py @@ -9,8 +9,9 @@ def convert_mul(ctx): input_a = ctx.method_args[0] input_b = ctx.method_args[1] - input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) output = ctx.method_return + input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.PROD) output._trt = layer.get_output(0) @@ -76,4 +77,32 @@ def forward(self, x): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) def test_rmul_float(): - return RMulFloat() \ No newline at end of file + return RMulFloat() + + +class MulConstantNoBatch(torch.nn.Module): + def __init__(self): + super(MulConstantNoBatch, self).__init__() + self.register_buffer('y', torch.ones((3, 10, 10))) + + def forward(self, x): + return x * self.y + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)]) +def test_mul_constant_nobatch(): + return MulConstantNoBatch() + + +class MulConstantBatch(torch.nn.Module): + def __init__(self): + super(MulConstantBatch, self).__init__() + self.register_buffer('y', torch.ones((1, 3, 10, 10))) + + def forward(self, x): + return x * self.y + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)]) +def test_mul_constant_batch(): + return MulConstantBatch() \ No newline at end of file diff --git a/torch2trt/converters/normalize.py b/torch2trt/converters/normalize.py index 2bdf8214..9f639dbf 100644 --- a/torch2trt/converters/normalize.py +++ b/torch2trt/converters/normalize.py @@ -14,7 +14,8 @@ def convert_normalize(ctx): output = ctx.method_return # add broadcastable scalar constants to network - input_trt, eps_trt, p_trt, p_inv_trt = trt_(ctx.network, input, eps, p, 1.0 / p) + input_trt, eps_trt, p_trt, p_inv_trt = add_missing_trt_tensors(ctx.network, [input, eps, p, 1.0 / p]) + input_trt, eps_trt, p_trt, p_inv_trt = broadcast_trt_tensors(ctx.network, [input_trt, eps_trt, p_trt, p_inv_trt], len(input_trt.shape)) # compute norm = sum(abs(x)**p, dim=dim)**(1./p) norm = ctx.network.add_unary(input_trt, trt.UnaryOperation.ABS).get_output(0) diff --git a/torch2trt/converters/pad.py b/torch2trt/converters/pad.py index 51df5526..7fa9ba88 100644 --- a/torch2trt/converters/pad.py +++ b/torch2trt/converters/pad.py @@ -5,7 +5,7 @@ @tensorrt_converter('torch.nn.functional.pad') def convert_pad(ctx): input = ctx.method_args[0] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return pad = ctx.method_args[1] diff --git a/torch2trt/converters/permute.py b/torch2trt/converters/permute.py index 7ef6fdf8..dc2355d3 100644 --- a/torch2trt/converters/permute.py +++ b/torch2trt/converters/permute.py @@ -5,7 +5,7 @@ @tensorrt_converter('torch.Tensor.permute') def convert_permute(ctx): input = ctx.method_args[0] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return # permutation -1 because TRT does not include batch dim diff --git a/torch2trt/converters/pow.py b/torch2trt/converters/pow.py index 4b743b48..3949b615 100644 --- a/torch2trt/converters/pow.py +++ b/torch2trt/converters/pow.py @@ -8,8 +8,9 @@ def convert_pow(ctx): input_a = ctx.method_args[0] input_b = ctx.method_args[1] - input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) output = ctx.method_return + input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.POW) output._trt = layer.get_output(0) @@ -18,8 +19,9 @@ def convert_pow(ctx): def convert_pow(ctx): input_a = ctx.method_args[1] input_b = ctx.method_args[0] # flipped for rpow - input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) output = ctx.method_return + input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.POW) output._trt = layer.get_output(0) diff --git a/torch2trt/converters/prelu.py b/torch2trt/converters/prelu.py index c2c4ca6f..256ae13c 100644 --- a/torch2trt/converters/prelu.py +++ b/torch2trt/converters/prelu.py @@ -11,7 +11,7 @@ def convert_prelu(ctx): weight_shape = [1] * (len(input.shape) - 1) weight_shape[0] = weight.numel() - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] # y = prelu(x) = relu(x) - alpha * relu(-x) diff --git a/torch2trt/converters/prod.py b/torch2trt/converters/prod.py index 185cdf22..7f5afbc6 100644 --- a/torch2trt/converters/prod.py +++ b/torch2trt/converters/prod.py @@ -9,7 +9,7 @@ def convert_prod(ctx): input = ctx.method_args[0] dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1, input.ndim))) keepdim = get_arg(ctx, 'keepdim', pos=2, default=False) - input_trt= trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.PROD, torch_dim_to_trt_axes(dim), keepdim) output._trt = layer.get_output(0) diff --git a/torch2trt/converters/sigmoid.py b/torch2trt/converters/sigmoid.py index 2443cf74..795ce415 100644 --- a/torch2trt/converters/sigmoid.py +++ b/torch2trt/converters/sigmoid.py @@ -6,7 +6,7 @@ @tensorrt_converter('torch.sigmoid') def convert_sigmoid(ctx): input = ctx.method_args[0] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return layer = ctx.network.add_activation(input_trt, trt.ActivationType.SIGMOID) diff --git a/torch2trt/converters/softmax.py b/torch2trt/converters/softmax.py index 1b9d8441..2b98f67b 100644 --- a/torch2trt/converters/softmax.py +++ b/torch2trt/converters/softmax.py @@ -5,7 +5,7 @@ @tensorrt_converter('torch.nn.functional.softmax') def convert_softmax(ctx): input = ctx.method_args[0] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return # get dims from args or kwargs diff --git a/torch2trt/converters/split.py b/torch2trt/converters/split.py index 23f964bd..f3ee135c 100644 --- a/torch2trt/converters/split.py +++ b/torch2trt/converters/split.py @@ -6,7 +6,7 @@ @tensorrt_converter('torch.Tensor.split') def convert_split(ctx): input = get_arg(ctx, 'input', 0, None) - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] # we don't need to parse split/chunk (arg 1) # since we infer size from output tensors dim = get_arg(ctx, 'dim', 2, 0) diff --git a/torch2trt/converters/sub.py b/torch2trt/converters/sub.py index 848fd1aa..d110c654 100644 --- a/torch2trt/converters/sub.py +++ b/torch2trt/converters/sub.py @@ -8,8 +8,9 @@ def convert_sub(ctx): input_a = ctx.method_args[0] input_b = ctx.method_args[1] - input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) output = ctx.method_return + input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUB) output._trt = layer.get_output(0) @@ -18,8 +19,9 @@ def convert_sub(ctx): def convert_sub(ctx): input_a = ctx.method_args[1] input_b = ctx.method_args[0] # flipped for rsub - input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) output = ctx.method_return + input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUB) output._trt = layer.get_output(0) @@ -86,4 +88,31 @@ def forward(self, x): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) def test_rsub_float(): - return RSubFloat() \ No newline at end of file + return RSubFloat() + +class SubConstantNoBatch(torch.nn.Module): + def __init__(self): + super(SubConstantNoBatch, self).__init__() + self.register_buffer('y', torch.ones((3, 10, 10))) + + def forward(self, x): + return x - self.y + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)]) +def test_sub_constant_nobatch(): + return SubConstantNoBatch() + + +class SubConstantBatch(torch.nn.Module): + def __init__(self): + super(SubConstantBatch, self).__init__() + self.register_buffer('y', torch.ones((1, 3, 10, 10))) + + def forward(self, x): + return x - self.y + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)]) +def test_sub_constant_batch(): + return SubConstantBatch() \ No newline at end of file diff --git a/torch2trt/converters/sum.py b/torch2trt/converters/sum.py index 52f21eb0..7b975c71 100644 --- a/torch2trt/converters/sum.py +++ b/torch2trt/converters/sum.py @@ -9,7 +9,7 @@ def convert_sum(ctx): input = ctx.method_args[0] dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1, input.ndim))) keepdim = get_arg(ctx, 'keepdim', pos=2, default=False) - input_trt= trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.SUM, torch_dim_to_trt_axes(dim), keepdim) output._trt = layer.get_output(0) @@ -44,7 +44,7 @@ def __init__(self, maxdisp): self.register_buffer('disp', torch.arange(maxdisp, dtype=torch.float32).view(maxdisp, 1, 1)) def forward(self, x): - return torch.sum(x * self.disp, 1) + return x * self.disp#, 1) @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)]) @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 23, 23)]) diff --git a/torch2trt/converters/tanh.py b/torch2trt/converters/tanh.py index f6ec83fe..00ca37b1 100644 --- a/torch2trt/converters/tanh.py +++ b/torch2trt/converters/tanh.py @@ -6,7 +6,7 @@ @tensorrt_converter('torch.tanh') def convert_tanh(ctx): input = ctx.method_args[0] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return layer = ctx.network.add_activation(input_trt, trt.ActivationType.TANH) diff --git a/torch2trt/converters/transpose.py b/torch2trt/converters/transpose.py index 4f0f28f4..c5131592 100644 --- a/torch2trt/converters/transpose.py +++ b/torch2trt/converters/transpose.py @@ -5,7 +5,7 @@ @tensorrt_converter("torch.transpose", enabled=trt_version() < '7.0') def convert_transpose(ctx): input = ctx.method_args[0] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return # permutation -1 because TRT does not include batch dim permutation = list(range(len(input.shape) - 1)) @@ -21,7 +21,7 @@ def convert_transpose(ctx): @tensorrt_converter('torch.transpose', enabled=trt_version() >= '7.0') def convert_transpose_trt7(ctx): input = ctx.method_args[0] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return # permutation -1 because TRT does not include batch dim permutation = list(range(len(input.shape) - 1)) diff --git a/torch2trt/converters/unary.py b/torch2trt/converters/unary.py index 67b64b4c..6f80f670 100644 --- a/torch2trt/converters/unary.py +++ b/torch2trt/converters/unary.py @@ -4,7 +4,7 @@ def __convert_unary(ctx, op): input = get_arg(ctx, 'input', pos=0, default=None) - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return layer = ctx.network.add_unary(input_trt, op) output._trt = layer.get_output(0) diff --git a/torch2trt/converters/view.py b/torch2trt/converters/view.py index 5d674f8b..2608ed21 100644 --- a/torch2trt/converters/view.py +++ b/torch2trt/converters/view.py @@ -11,7 +11,7 @@ @tensorrt_converter('torch.unsqueeze') def convert_view(ctx): input = ctx.method_args[0] - input_trt = trt_(ctx.network, input) + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return layer = ctx.network.add_shuffle(input_trt) layer.reshape_dims = tuple(output.shape[1:]) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 25b4fa67..2cc1f2cf 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -127,28 +127,30 @@ def add_missing_trt_tensors(network, tensors): # GET TRT TENSOR (OR CREATE TRT CONSTANT) # get tensor w/ _trt - if isinstance(t, torch.Tensor) and hasattr(t, "_trt"): + # or... add constant for scalar primitive + if isinstance(t, float) or isinstance(t, int): + shape = (1,) + scalar = t * torch.ones(shape, dtype=dtype).cpu().numpy() + trt_tensor = network.add_constant(shape, scalar).get_output(0) + elif hasattr(t, "_trt"): trt_tensor = t._trt # or... add constant for leaf tensor w/o _trt - elif isinstance(t, torch.Tensor) and not hasattr(t, "_trt"): + else: # remove all preceding ones, these can be re-inserted later when broadcasting num_preceding_ones = 0 for j in range(t.ndim): if int(t.shape[j]) == 1: num_preceding_ones += 1 + else: + break shape = tuple(t.shape[num_preceding_ones:]) weight = t.detach().cpu().numpy() t._trt = network.add_constant(shape, weight).get_output(0) trt_tensor = t._trt - # or... add constant for scalar primitive - elif isinstance(t, float) or isinstance(t, int): - shape = (1,) - scalar = t * torch.ones(shape, dtype=dtype).cpu().numpy() - trt_tensor = network.add_constant(shape, scalar).get_output(0) assert trt_tensor is not None From 742b3fd96e8f54274a68a06970d39937565a9cbf Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 2 Sep 2020 07:55:15 +0000 Subject: [PATCH 302/355] dump converters script --- scripts/dump_converters.py | 43 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 scripts/dump_converters.py diff --git a/scripts/dump_converters.py b/scripts/dump_converters.py new file mode 100644 index 00000000..ba271d7d --- /dev/null +++ b/scripts/dump_converters.py @@ -0,0 +1,43 @@ +import argparse +import sys +import subprocess +import os +from importlib.machinery import SourceFileLoader + +torch2trt = SourceFileLoader("torch2trt", "torch2trt/__init__.py").load_module() # to load relative to root + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + parser.add_argument('--github', + type=str, + default='https://github.com/NVIDIA-AI-IOT/torch2trt') + args = parser.parse_args() + + print('| Method | Converter |') + print('|--------|-----------|') + + for method, entry in torch2trt.CONVERTERS.items(): + + if not entry['is_real']: + continue + + converter = entry['converter'] + + # get commit hash + p = subprocess.Popen(['git', 'rev-parse', 'HEAD'], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + commit, err = p.communicate() + commit = commit.decode('utf-8').strip('\n') + + # get github URL + url = '{github}/blob/{commit}/{relpath}#L{lineno}'.format( + github=args.github, + commit=str(commit), + relpath=os.path.relpath(converter.__code__.co_filename, + os.path.abspath('.')), + lineno=converter.__code__.co_firstlineno) + + print('| {method} | [{converter}]({url}) |'.format( + method=method, converter=converter.__name__, url=url)) From 765c3ad9221c9bc8b615b549b0b1d12eb9fff17e Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 2 Sep 2020 09:24:12 +0000 Subject: [PATCH 303/355] docs --- .gitignore | 1 + docs/benchmarks/jetson_nano.md | 22 ++++++++ docs/benchmarks/jetson_xavier.md | 33 ++++++++++++ docs/css/version-select.css | 5 ++ docs/getting_started.md | 84 +++++++++++++++++++++++++++++ docs/images/chart.svg | 1 + docs/index.md | 12 +++++ docs/js/version-select.js | 49 +++++++++++++++++ docs/usage/basic_usage.md | 1 + docs/usage/batch_size.md | 1 + docs/usage/cpp_usage.md | 2 + docs/usage/custom_converter.md | 1 + docs/usage/network_visualization.md | 1 + docs/usage/profiling.md | 1 + docs/usage/reduced_precision.md | 1 + mkdocs.yml | 57 ++++++++++++++++++++ scripts/build_docs.sh | 7 +++ scripts/dump_converters.py | 33 ++++++++---- scripts/push_docs.sh | 7 +++ scripts/test_docs.sh | 6 +++ 20 files changed, 314 insertions(+), 11 deletions(-) create mode 100644 docs/benchmarks/jetson_nano.md create mode 100644 docs/benchmarks/jetson_xavier.md create mode 100644 docs/css/version-select.css create mode 100644 docs/getting_started.md create mode 100644 docs/images/chart.svg create mode 100644 docs/index.md create mode 100644 docs/js/version-select.js create mode 100644 docs/usage/basic_usage.md create mode 100644 docs/usage/batch_size.md create mode 100644 docs/usage/cpp_usage.md create mode 100644 docs/usage/custom_converter.md create mode 100644 docs/usage/network_visualization.md create mode 100644 docs/usage/profiling.md create mode 100644 docs/usage/reduced_precision.md create mode 100644 mkdocs.yml create mode 100755 scripts/build_docs.sh create mode 100755 scripts/push_docs.sh create mode 100755 scripts/test_docs.sh diff --git a/.gitignore b/.gitignore index 172017dc..5b2ee2e5 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ __pycache__/ *.pyc *.ipynb_checkpoints *.pth +docs/converters.md diff --git a/docs/benchmarks/jetson_nano.md b/docs/benchmarks/jetson_nano.md new file mode 100644 index 00000000..f58c9a3c --- /dev/null +++ b/docs/benchmarks/jetson_nano.md @@ -0,0 +1,22 @@ +# Jetson Nano + +| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | Throughput (PyTorch) | Throughput (TensorRT) | Latency (PyTorch) | Latency (TensorRT) | +|------|-----------|--------------|------------------|-----------|----------------------|-----------------------|-------------------|--------------------| +| torchvision.models.alexnet.alexnet | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.29E-05 | 46.4 | 69.9 | 22.1 | 14.7 | +| torchvision.models.squeezenet.squeezenet1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.20E-02 | 44 | 137 | 24.2 | 7.6 | +| torchvision.models.squeezenet.squeezenet1_1 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.77E-04 | 76.6 | 248 | 14 | 4.34 | +| torchvision.models.resnet.resnet18 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.86E-03 | 29.4 | 90.2 | 34.7 | 11.4 | +| torchvision.models.resnet.resnet34 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.56E-01 | 15.5 | 50.7 | 64.8 | 20.2 | +| torchvision.models.resnet.resnet50 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 6.45E-02 | 12.4 | 34.2 | 81.7 | 29.8 | +| torchvision.models.resnet.resnet101 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.01E+03 | 7.18 | 19.9 | 141 | 51.1 | +| torchvision.models.resnet.resnet152 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 4.96 | 14.1 | 204 | 72.3 | +| torchvision.models.densenet.densenet121 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.42E-03 | 11.5 | 41.9 | 84.5 | 24.8 | +| torchvision.models.densenet.densenet169 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.86E-03 | 8.25 | 33.2 | 118 | 31.2 | +| torchvision.models.densenet.densenet201 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.42E-03 | 6.84 | 25.4 | 141 | 40.8 | +| torchvision.models.densenet.densenet161 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.15E-03 | 4.71 | 15.6 | 247 | 65.8 | +| torchvision.models.vgg.vgg11 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.51E-04 | 8.9 | 18.3 | 114 | 55.1 | +| torchvision.models.vgg.vgg13 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.07E-04 | 6.53 | 14.7 | 156 | 68.7 | +| torchvision.models.vgg.vgg16 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.58E-04 | 5.09 | 11.9 | 201 | 85.1 | +| torchvision.models.vgg.vgg11_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.81E-04 | 8.74 | 18.4 | 117 | 54.8 | +| torchvision.models.vgg.vgg13_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.19E-04 | 6.31 | 14.8 | 162 | 68.5 | +| torchvision.models.vgg.vgg16_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.77E-04 | 4.96 | 12 | 207 | 84.3 | diff --git a/docs/benchmarks/jetson_xavier.md b/docs/benchmarks/jetson_xavier.md new file mode 100644 index 00000000..1c3cb2c6 --- /dev/null +++ b/docs/benchmarks/jetson_xavier.md @@ -0,0 +1,33 @@ +# Jetson Xavier + +| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | Throughput (PyTorch) | Throughput (TensorRT) | Latency (PyTorch) | Latency (TensorRT) | +|------|-----------|--------------|------------------|-----------|----------------------|-----------------------|-------------------|--------------------| +| torch2trt.tests.torchvision.classification.alexnet | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 7.63E-05 | 251 | 565 | 4.96 | 2.02 | +| torch2trt.tests.torchvision.classification.squeezenet1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.77E-04 | 121 | 834 | 8.04 | 1.49 | +| torch2trt.tests.torchvision.classification.squeezenet1_1 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.77E-04 | 125 | 1.29e+03 | 8.01 | 1.02 | +| torch2trt.tests.torchvision.classification.resnet18 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.77E-03 | 136 | 722 | 7.33 | 1.64 | +| torch2trt.tests.torchvision.classification.resnet34 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.50E-01 | 77.8 | 396 | 12.9 | 2.79 | +| torch2trt.tests.torchvision.classification.resnet50 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.09E-01 | 55.8 | 326 | 17.9 | 3.37 | +| torch2trt.tests.torchvision.classification.resnet101 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 28.3 | 175 | 35.1 | 6.04 | +| torch2trt.tests.torchvision.classification.resnet152 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 18.8 | 122 | 53.2 | 8.57 | +| torch2trt.tests.torchvision.classification.densenet121 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 7.81E-03 | 20.9 | 76.6 | 47.5 | 13 | +| torch2trt.tests.torchvision.classification.densenet169 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.91E-03 | 14.8 | 41.7 | 66.7 | 23.7 | +| torch2trt.tests.torchvision.classification.densenet201 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.88E-03 | 12.6 | 30.2 | 79.1 | 33 | +| torch2trt.tests.torchvision.classification.densenet161 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.88E-03 | 16.1 | 43.7 | 62.1 | 23 | +| torch2trt.tests.torchvision.classification.vgg11 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.56E-03 | 84.8 | 201 | 12.1 | 5.24 | +| torch2trt.tests.torchvision.classification.vgg13 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.24E-03 | 71.1 | 165 | 14.3 | 6.34 | +| torch2trt.tests.torchvision.classification.vgg16 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.78E-03 | 61.5 | 139 | 16.5 | 7.46 | +| torch2trt.tests.torchvision.classification.vgg19 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.81E-03 | 54.1 | 120 | 18.7 | 8.61 | +| torch2trt.tests.torchvision.classification.vgg11_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.20E-03 | 81.5 | 200 | 12.5 | 5.27 | +| torch2trt.tests.torchvision.classification.vgg13_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.71E-03 | 67.5 | 165 | 15.1 | 6.33 | +| torch2trt.tests.torchvision.classification.vgg16_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.87E-03 | 58.3 | 139 | 17.4 | 7.48 | +| torch2trt.tests.torchvision.classification.vgg19_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.44E-03 | 51.4 | 120 | 19.7 | 8.61 | +| torch2trt.tests.torchvision.classification.mobilenet_v2 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 64.8 | 723 | 15.4 | 1.67 | +| torch2trt.tests.torchvision.classification.shufflenet_v2_x0_5 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.53E-05 | 51.2 | 463 | 19.4 | 2.17 | +| torch2trt.tests.torchvision.classification.shufflenet_v2_x1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.53E-05 | 49.4 | 419 | 20.4 | 2.43 | +| torch2trt.tests.torchvision.classification.shufflenet_v2_x1_5 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.53E-05 | 51.4 | 426 | 19.6 | 2.37 | +| torch2trt.tests.torchvision.classification.shufflenet_v2_x2_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.53E-05 | 48.2 | 419 | 20.8 | 2.48 | +| torch2trt.tests.torchvision.classification.mnasnet0_5 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.03E-06 | 67.8 | 883 | 14.9 | 1.4 | +| torch2trt.tests.torchvision.classification.mnasnet0_75 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 67.6 | 751 | 14.8 | 1.6 | +| torch2trt.tests.torchvision.classification.mnasnet1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 65.7 | 667 | 15.2 | 1.77 | +| torch2trt.tests.torchvision.classification.mnasnet1_3 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 67.4 | 573 | 15 | 2.02 | diff --git a/docs/css/version-select.css b/docs/css/version-select.css new file mode 100644 index 00000000..49079bf4 --- /dev/null +++ b/docs/css/version-select.css @@ -0,0 +1,5 @@ +@media only screen and (max-width:76.1875em) { + #version-selector { + padding: .6rem .8rem; + } +} diff --git a/docs/getting_started.md b/docs/getting_started.md new file mode 100644 index 00000000..30adec06 --- /dev/null +++ b/docs/getting_started.md @@ -0,0 +1,84 @@ +# Getting Started + +Follow these steps to get started using torch2trt. + +## Installation + +!!! note + + torch2trt depends on the TensorRT Python API. On Jetson, this is included with the latest JetPack. For desktop, please follow the [TensorRT Installation Guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html). You may also try installing torch2trt inside one of the NGC PyTorch docker containers for [Desktop](https://ngc.nvidia.com/catalog/containers/nvidia:pytorch) or [Jetson](https://ngc.nvidia.com/catalog/containers/nvidia:l4t-pytorch). + +### Option 1 - Without plugins + +To install without compiling plugins, call the following + +```bash +git clone https://github.com/NVIDIA-AI-IOT/torch2trt +cd torch2trt +python setup.py install +``` + +### Option 2 - With plugins (experimental) + +To install with plugins to support some operations in PyTorch that are not natviely supported with TensorRT, call the following + +!!! note + + Please note, this currently only includes the interpolate plugin. This plugin requires PyTorch 1.3+ for serialization. + +```bash +git clone https://github.com/NVIDIA-AI-IOT/torch2trt +cd torch2trt +sudo python setup.py install --plugins +``` + +## Basic Usage + +Below are some usage examples, for more check out the [usage](usage) guide. + +### Convert + +```python +import torch +from torch2trt import torch2trt +from torchvision.models.alexnet import alexnet + +# create some regular pytorch model... +model = alexnet(pretrained=True).eval().cuda() + +# create example data +x = torch.ones((1, 3, 224, 224)).cuda() + +# convert to TensorRT feeding sample data as input +model_trt = torch2trt(model, [x]) +``` + +### Execute + +We can execute the returned ``TRTModule`` just like the original PyTorch model + +```python +y = model(x) +y_trt = model_trt(x) + +# check the output against PyTorch +print(torch.max(torch.abs(y - y_trt))) +``` + +### Save and load + +We can save the model as a ``state_dict``. + +```python +torch.save(model_trt.state_dict(), 'alexnet_trt.pth') +``` + +We can load the saved model into a ``TRTModule`` + +```python +from torch2trt import TRTModule + +model_trt = TRTModule() + +model_trt.load_state_dict(torch.load('alexnet_trt.pth')) +``` \ No newline at end of file diff --git a/docs/images/chart.svg b/docs/images/chart.svg new file mode 100644 index 00000000..28f0b295 --- /dev/null +++ b/docs/images/chart.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 00000000..91f8eca8 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,12 @@ +# torch2trt + + + +torch2trt is a PyTorch to TensorRT converter which utilizes the +TensorRT Python API. The converter is + +* Easy to use - Convert modules with a single function call ``torch2trt`` + +* Easy to extend - Write your own layer converter in Python and register it with ``@tensorrt_converter`` + +If you find an issue, please [let us know](https://github.com/NVIDIA-AI-IOT/torch2trt)! \ No newline at end of file diff --git a/docs/js/version-select.js b/docs/js/version-select.js new file mode 100644 index 00000000..794b5cc8 --- /dev/null +++ b/docs/js/version-select.js @@ -0,0 +1,49 @@ +window.addEventListener("DOMContentLoaded", function() { + // This is a bit hacky. Figure out the base URL from a known CSS file the + // template refers to... + var ex = new RegExp("/?css/version-select.css$"); + var sheet = document.querySelector('link[href$="version-select.css"]'); + + var ABS_BASE_URL = sheet.href.replace(ex, ""); + var CURRENT_VERSION = ABS_BASE_URL.split("/").pop(); + + function makeSelect(options, selected) { + var select = document.createElement("select"); + select.classList.add("form-control"); + + options.forEach(function(i) { + var option = new Option(i.text, i.value, undefined, + i.value === selected); + select.add(option); + }); + + return select; + } + + var xhr = new XMLHttpRequest(); + xhr.open("GET", ABS_BASE_URL + "/../versions.json"); + xhr.onload = function() { + var versions = JSON.parse(this.responseText); + + var realVersion = versions.find(function(i) { + return i.version === CURRENT_VERSION || + i.aliases.includes(CURRENT_VERSION); + }).version; + + var select = makeSelect(versions.map(function(i) { + return {text: i.title, value: i.version}; + }), realVersion); + select.addEventListener("change", function(event) { + window.location.href = ABS_BASE_URL + "/../" + this.value; + }); + + var container = document.createElement("div"); + container.id = "version-selector"; + container.className = "md-nav__item"; + container.appendChild(select); + + var sidebar = document.querySelector(".md-nav--primary > .md-nav__list"); + sidebar.parentNode.insertBefore(container, sidebar); + }; + xhr.send(); +}); diff --git a/docs/usage/basic_usage.md b/docs/usage/basic_usage.md new file mode 100644 index 00000000..f2abed1c --- /dev/null +++ b/docs/usage/basic_usage.md @@ -0,0 +1 @@ +# Basic Usage \ No newline at end of file diff --git a/docs/usage/batch_size.md b/docs/usage/batch_size.md new file mode 100644 index 00000000..fabdff82 --- /dev/null +++ b/docs/usage/batch_size.md @@ -0,0 +1 @@ +# Batch Size \ No newline at end of file diff --git a/docs/usage/cpp_usage.md b/docs/usage/cpp_usage.md new file mode 100644 index 00000000..892e28a6 --- /dev/null +++ b/docs/usage/cpp_usage.md @@ -0,0 +1,2 @@ +# Cpp Usage + diff --git a/docs/usage/custom_converter.md b/docs/usage/custom_converter.md new file mode 100644 index 00000000..2a1883f8 --- /dev/null +++ b/docs/usage/custom_converter.md @@ -0,0 +1 @@ +# Custom Converter \ No newline at end of file diff --git a/docs/usage/network_visualization.md b/docs/usage/network_visualization.md new file mode 100644 index 00000000..3d635a47 --- /dev/null +++ b/docs/usage/network_visualization.md @@ -0,0 +1 @@ +# Network Visualization \ No newline at end of file diff --git a/docs/usage/profiling.md b/docs/usage/profiling.md new file mode 100644 index 00000000..f6c15c4d --- /dev/null +++ b/docs/usage/profiling.md @@ -0,0 +1 @@ +# Profiling \ No newline at end of file diff --git a/docs/usage/reduced_precision.md b/docs/usage/reduced_precision.md new file mode 100644 index 00000000..1b31594f --- /dev/null +++ b/docs/usage/reduced_precision.md @@ -0,0 +1 @@ +# Reduced Precision \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 00000000..78e99d68 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,57 @@ +site_name: torch2trt +theme: + name: "material" + palette: + primary: green + secondary: light green + +repo_url: https://github.com/NVIDIA-AI-IOT/torch2trt + +plugins: + - search + +use_directory_urls: False + +edit_uri: blob/master +markdown_extensions: + - pymdownx.tabbed + - pymdownx.keys + - pymdownx.snippets + - pymdownx.inlinehilite + - pymdownx.highlight: + use_pygments: true + - admonition + - pymdownx.details + - pymdownx.superfences + - attr_list + +# use_directory_urls - False to fix broken raw html image links +# https://github.com/mkdocs/mkdocs/issues/991 + + +nav: + + - Home: index.md + - Getting Started: getting_started.md + - Usage: + - Basic Usage: usage/basic_usage.md + - Reduced Precision: usage/reduced_precision.md + - Batch Size: usage/batch_size.md + - Cpp Usage: usage/cpp_usage.md + - Custom Converter: usage/custom_converter.md + - Profiling: usage/profiling.md + - Network Visualization: usage/network_visualization.md + - Converters: converters.md + - Benchmarks: + - Jetson Nano: benchmarks/jetson_nano.md + - Jetson Xavier: benchmarks/jetson_xavier.md + +extra_css: + - css/version-select.css +extra_javascript: + - js/version-select.js + +google_analytics: + - UA-135919510-3 + - auto + \ No newline at end of file diff --git a/scripts/build_docs.sh b/scripts/build_docs.sh new file mode 100755 index 00000000..a1883671 --- /dev/null +++ b/scripts/build_docs.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +TAG=$1 + +# python3 scripts/dump_converters.py --tag=$TAG > docs/converters.md + +mike deploy $TAG \ No newline at end of file diff --git a/scripts/dump_converters.py b/scripts/dump_converters.py index ba271d7d..cd01465b 100644 --- a/scripts/dump_converters.py +++ b/scripts/dump_converters.py @@ -6,17 +6,28 @@ torch2trt = SourceFileLoader("torch2trt", "torch2trt/__init__.py").load_module() # to load relative to root +HEADER = """ +# Converters + +This table contains a list of supported PyTorch methods and their associated converters. + +If your model is not converting, a good start in debugging would be to see if it contains a method not listed +in this table. You may also find these a useful reference when writing your own converters. + +| Method | Converter | +|--------|-----------|""" + if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--github', type=str, default='https://github.com/NVIDIA-AI-IOT/torch2trt') + parser.add_argument('--tag', type=str, default='master') args = parser.parse_args() - - print('| Method | Converter |') - print('|--------|-----------|') - + + print(HEADER) + for method, entry in torch2trt.CONVERTERS.items(): if not entry['is_real']: @@ -25,19 +36,19 @@ converter = entry['converter'] # get commit hash - p = subprocess.Popen(['git', 'rev-parse', 'HEAD'], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - commit, err = p.communicate() - commit = commit.decode('utf-8').strip('\n') +# p = subprocess.Popen(['git', 'rev-parse', 'HEAD'], +# stdout=subprocess.PIPE, +# stderr=subprocess.PIPE) +# commit, err = p.communicate() +# commit = commit.decode('utf-8').strip('\n') # get github URL url = '{github}/blob/{commit}/{relpath}#L{lineno}'.format( github=args.github, - commit=str(commit), + commit=args.tag, relpath=os.path.relpath(converter.__code__.co_filename, os.path.abspath('.')), lineno=converter.__code__.co_firstlineno) - print('| {method} | [{converter}]({url}) |'.format( + print('| ``{method}`` | [``{converter}``]({url}) |'.format( method=method, converter=converter.__name__, url=url)) diff --git a/scripts/push_docs.sh b/scripts/push_docs.sh new file mode 100755 index 00000000..9d5fc870 --- /dev/null +++ b/scripts/push_docs.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +TAG=$1 + +python3 scripts/dump_converters.py > docs/converters.md + +mike deploy $TAG --push diff --git a/scripts/test_docs.sh b/scripts/test_docs.sh new file mode 100755 index 00000000..b9be6305 --- /dev/null +++ b/scripts/test_docs.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +TAG=$1 + +mike set-default $TAG +mike serve --dev-addr=0.0.0.0:8000 \ No newline at end of file From 3adbbe131f5e3079533e38dbdb37314900eccdf9 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 2 Sep 2020 09:27:15 +0000 Subject: [PATCH 304/355] changelog contributing --- CHANGELOG.md | 1 + CONTRIBUTING.md | 1 + 2 files changed, 2 insertions(+) create mode 100644 CHANGELOG.md create mode 100644 CONTRIBUTING.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..5ddad421 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1 @@ +# Changelog \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..4d218d9f --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1 @@ +# Contributing \ No newline at end of file From 3beeae2bb8ed986b0c00e390d54f869c69f2ed09 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 2 Sep 2020 10:06:45 +0000 Subject: [PATCH 305/355] docs --- CHANGELOG.md | 2 +- CONTRIBUTING.md | 52 ++++++++++++++++++++++++++++- docs/CHANGELOG.md | 1 + docs/CONTRIBUTING.md | 1 + docs/usage/debugging.md | 1 + docs/usage/network_visualization.md | 1 - mkdocs.yml | 4 ++- 7 files changed, 58 insertions(+), 4 deletions(-) create mode 120000 docs/CHANGELOG.md create mode 120000 docs/CONTRIBUTING.md create mode 100644 docs/usage/debugging.md delete mode 100644 docs/usage/network_visualization.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ddad421..252d4483 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1 +1 @@ -# Changelog \ No newline at end of file +# Changes \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4d218d9f..cadcb869 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1 +1,51 @@ -# Contributing \ No newline at end of file +# Contributing + +## Submit an Issue + +torch2trt is use case driven. We originally created it to solve +use cases related to NVIDIA Jetson, but the layer support has grown +largely since it's release and we've found that it has +helped many other developers as well. + +The growth of torch2trt has been largely driven by issues submitted on [GitHub](https://github.com/NVIDIA-AI-IOT/torch2trt/issues). +We learn a lot from the reported issues. Submitting an issue it is one of the best ways to begin contributing to torch2trt. + +The reported issues are typically are one of the following, + +* A bug or unexpected result +* A model with unsupported layers + +If you report an issue, we typically find the following information helpful + +* PyTorch version +* TensorRT version +* Platform (ie: Jetson Nano) +* The PyTorch Module you're attempting to convert +* The steps taken to convert the PyTorch module + +If you're not sure how to provide any of these pieces of information, don't worry. Just open the pull request +and we're happy to discuss and help work out the details. + +## Ask a Question + +Another great way to contribute is to ask a question on [GitHub](https://github.com/NVIDIA-AI-IOT/torch2trt/issues). +There are often other developers who share your question, and they may find the discussion helpful. This also +helps us gauge feature interest and identify gaps in documentation. + + +## Submit a Pull Request + +torch2trt is use case driven and has limited maintainence, for this reason we value community contributions greatly. +Another great way to contribute is by submitting a pull request. Pull requests which are most likely to be accepted are + +* A new converter +* A test case +* A bug fix + +If you add a new converter, it is best to include a few test +cases that cross validate the converter against the original PyTorch. We provide a utility function to do this, +as described in the [Custom Converter](usage/custom_converter.md) usage guide. + +Ideally pull requests solve one thing at a time. This makes it easy +to evaluate the impact that the changes have on the project step-by-step. The more confident we are that +the changes will not adversely impact the experience of other developers, the more likely we are to accept them. \ No newline at end of file diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md new file mode 120000 index 00000000..04c99a55 --- /dev/null +++ b/docs/CHANGELOG.md @@ -0,0 +1 @@ +../CHANGELOG.md \ No newline at end of file diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md new file mode 120000 index 00000000..44fcc634 --- /dev/null +++ b/docs/CONTRIBUTING.md @@ -0,0 +1 @@ +../CONTRIBUTING.md \ No newline at end of file diff --git a/docs/usage/debugging.md b/docs/usage/debugging.md new file mode 100644 index 00000000..94853297 --- /dev/null +++ b/docs/usage/debugging.md @@ -0,0 +1 @@ +# Debugging \ No newline at end of file diff --git a/docs/usage/network_visualization.md b/docs/usage/network_visualization.md deleted file mode 100644 index 3d635a47..00000000 --- a/docs/usage/network_visualization.md +++ /dev/null @@ -1 +0,0 @@ -# Network Visualization \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 78e99d68..c8534828 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -40,11 +40,13 @@ nav: - Cpp Usage: usage/cpp_usage.md - Custom Converter: usage/custom_converter.md - Profiling: usage/profiling.md - - Network Visualization: usage/network_visualization.md + - Debugging: usage/debugging.md - Converters: converters.md - Benchmarks: - Jetson Nano: benchmarks/jetson_nano.md - Jetson Xavier: benchmarks/jetson_xavier.md + - Contributing: CONTRIBUTING.md + - Changes: CHANGELOG.md extra_css: - css/version-select.css From 422e1e662c7a4c5e3038d8fe651e32fd8bd808d1 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 2 Sep 2020 10:10:01 +0000 Subject: [PATCH 306/355] typos --- CONTRIBUTING.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cadcb869..cfea8844 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -10,7 +10,7 @@ helped many other developers as well. The growth of torch2trt has been largely driven by issues submitted on [GitHub](https://github.com/NVIDIA-AI-IOT/torch2trt/issues). We learn a lot from the reported issues. Submitting an issue it is one of the best ways to begin contributing to torch2trt. -The reported issues are typically are one of the following, +The reported issues typically are one of the following, * A bug or unexpected result * A model with unsupported layers @@ -23,7 +23,7 @@ If you report an issue, we typically find the following information helpful * The PyTorch Module you're attempting to convert * The steps taken to convert the PyTorch module -If you're not sure how to provide any of these pieces of information, don't worry. Just open the pull request +If you're not sure how to provide any of these pieces of information, don't worry. Just open the issue and we're happy to discuss and help work out the details. ## Ask a Question From 4933faf05cd9e5d9aef2b23e7fe692bfda5fbc06 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 2 Sep 2020 11:19:46 +0000 Subject: [PATCH 307/355] docs --- docs/see_also.md | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ mkdocs.yml | 1 + 2 files changed, 56 insertions(+) create mode 100644 docs/see_also.md diff --git a/docs/see_also.md b/docs/see_also.md new file mode 100644 index 00000000..064da014 --- /dev/null +++ b/docs/see_also.md @@ -0,0 +1,55 @@ +# See Also + +!!! note + + The state of these converters may change over time. We provide this information here with the hope that it will help shed light on the landscape of tools available for optimizing PyTorch models with TensorRT. + If you find this information helpful or outdated / misleading, please let us know. + +In addition to torch2trt, there are other workflows for optimizing your PyTorch model with TensorRT. + +The other converters we are aware of are + +* [ONNX to TensorRT](https://github.com/onnx/onnx-tensorrt) + +!!! tip + + Since the ONNX parser ships with TensorRT, we have included a convenience method for using this + workflow with torch2trt. If you want to quickly try the ONNX method using the torch2trt interface, just call ``torch2trt(..., use_onnx=True)``. + This will perform conversion on the module by exporting the model using PyTorch's JIT tracer, + and parsing with TensorRT's ONNX parser. + +* [TRTorch](https://github.com/NVIDIA/TRTorch) + +Which one you use depends largely on your use case. The differences often come down to + +## Layer support + +Modern deep learning frameworks are large, and there often arise +caveats converting between frameworks using a given workflow. These could include +limitations in serialization or parsing formats. Or in some instances, it may be possible +the layer could be supported, but it has just not been done yet. TRTorch is strong +in the sense that it will default to the original PyTorch method for layers +which are not converted to TensorRT. The best way to know +which conversion method works for you is to try converting your model. + +## Feature support + +TensorRT is evolving and the conversion workflows may have varying level +of feature support. In some instances, you may wish to use a latest feature of TensorRT, like dynamic shapes, +but it is not supported in torch2trt or the interface has not yet been exposed. In this +instance, we recommend checking to see if it is supported by one of the other workflows. The ONNX +converter is typically strong in this regards, since the parser is distributed with TensorRT. + +!!! note + + If there is a TensorRT feature you wished to see in torch2trt, please let us know. We can not gaurantee this will be done, but it helps us gauge interest. + +## Extensibility / Ease of Use + +In case none of the converters satisfy for your use case, you may find it necessary to adapt +the converter to fit your needs. This is very intuitive with torch2trt, +since it is done inline with Python, and there are many [examples](converters) to reference. If you know +how the original PyTorch method works, and have the TensorRT Python API on hand, it is relatively straight forward to adapt torch2trt to your needs. +The extensibility is often helpful when you want to implement a converter that is specific to the +context the layer appears in. + diff --git a/mkdocs.yml b/mkdocs.yml index c8534828..1790baed 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -47,6 +47,7 @@ nav: - Jetson Xavier: benchmarks/jetson_xavier.md - Contributing: CONTRIBUTING.md - Changes: CHANGELOG.md + - See Also: see_also.md extra_css: - css/version-select.css From 3ddd70d7cb7074ed204bd77205ab8f6515e06312 Mon Sep 17 00:00:00 2001 From: "j.chu" Date: Thu, 3 Sep 2020 04:31:22 +0800 Subject: [PATCH 308/355] expose the batch size of INT8 calibration as parameter, since different size may generate different accuracy loss. --- torch2trt/torch2trt.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 5d4c8c70..9219cde2 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -392,6 +392,7 @@ def torch2trt(module, int8_mode=False, int8_calib_dataset=None, int8_calib_algorithm=DEFAULT_CALIBRATION_ALGORITHM, + int8_calib_batch_size=1, use_onnx=False): inputs_in = inputs @@ -454,7 +455,7 @@ def torch2trt(module, # @TODO(jwelsh): Should we set batch_size=max_batch_size? Need to investigate memory consumption builder.int8_calibrator = DatasetCalibrator( - inputs, int8_calib_dataset, batch_size=1, algorithm=int8_calib_algorithm + inputs, int8_calib_dataset, batch_size=int8_calib_batch_size, algorithm=int8_calib_algorithm ) engine = builder.build_cuda_engine(network) From 16492db4027009da21a255f24ac99d0b40e2b8fc Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 2 Sep 2020 07:55:15 +0000 Subject: [PATCH 309/355] dump converters script --- scripts/dump_converters.py | 43 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 scripts/dump_converters.py diff --git a/scripts/dump_converters.py b/scripts/dump_converters.py new file mode 100644 index 00000000..ba271d7d --- /dev/null +++ b/scripts/dump_converters.py @@ -0,0 +1,43 @@ +import argparse +import sys +import subprocess +import os +from importlib.machinery import SourceFileLoader + +torch2trt = SourceFileLoader("torch2trt", "torch2trt/__init__.py").load_module() # to load relative to root + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + parser.add_argument('--github', + type=str, + default='https://github.com/NVIDIA-AI-IOT/torch2trt') + args = parser.parse_args() + + print('| Method | Converter |') + print('|--------|-----------|') + + for method, entry in torch2trt.CONVERTERS.items(): + + if not entry['is_real']: + continue + + converter = entry['converter'] + + # get commit hash + p = subprocess.Popen(['git', 'rev-parse', 'HEAD'], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + commit, err = p.communicate() + commit = commit.decode('utf-8').strip('\n') + + # get github URL + url = '{github}/blob/{commit}/{relpath}#L{lineno}'.format( + github=args.github, + commit=str(commit), + relpath=os.path.relpath(converter.__code__.co_filename, + os.path.abspath('.')), + lineno=converter.__code__.co_firstlineno) + + print('| {method} | [{converter}]({url}) |'.format( + method=method, converter=converter.__name__, url=url)) From 88416f960073e46740684f058e08c539f1b75cbc Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 2 Sep 2020 09:24:12 +0000 Subject: [PATCH 310/355] docs --- .gitignore | 1 + docs/benchmarks/jetson_nano.md | 22 ++++++++ docs/benchmarks/jetson_xavier.md | 33 ++++++++++++ docs/css/version-select.css | 5 ++ docs/getting_started.md | 84 +++++++++++++++++++++++++++++ docs/images/chart.svg | 1 + docs/index.md | 12 +++++ docs/js/version-select.js | 49 +++++++++++++++++ docs/usage/basic_usage.md | 1 + docs/usage/batch_size.md | 1 + docs/usage/cpp_usage.md | 2 + docs/usage/custom_converter.md | 1 + docs/usage/network_visualization.md | 1 + docs/usage/profiling.md | 1 + docs/usage/reduced_precision.md | 1 + mkdocs.yml | 57 ++++++++++++++++++++ scripts/build_docs.sh | 7 +++ scripts/dump_converters.py | 33 ++++++++---- scripts/push_docs.sh | 7 +++ scripts/test_docs.sh | 6 +++ 20 files changed, 314 insertions(+), 11 deletions(-) create mode 100644 docs/benchmarks/jetson_nano.md create mode 100644 docs/benchmarks/jetson_xavier.md create mode 100644 docs/css/version-select.css create mode 100644 docs/getting_started.md create mode 100644 docs/images/chart.svg create mode 100644 docs/index.md create mode 100644 docs/js/version-select.js create mode 100644 docs/usage/basic_usage.md create mode 100644 docs/usage/batch_size.md create mode 100644 docs/usage/cpp_usage.md create mode 100644 docs/usage/custom_converter.md create mode 100644 docs/usage/network_visualization.md create mode 100644 docs/usage/profiling.md create mode 100644 docs/usage/reduced_precision.md create mode 100644 mkdocs.yml create mode 100755 scripts/build_docs.sh create mode 100755 scripts/push_docs.sh create mode 100755 scripts/test_docs.sh diff --git a/.gitignore b/.gitignore index 172017dc..5b2ee2e5 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ __pycache__/ *.pyc *.ipynb_checkpoints *.pth +docs/converters.md diff --git a/docs/benchmarks/jetson_nano.md b/docs/benchmarks/jetson_nano.md new file mode 100644 index 00000000..f58c9a3c --- /dev/null +++ b/docs/benchmarks/jetson_nano.md @@ -0,0 +1,22 @@ +# Jetson Nano + +| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | Throughput (PyTorch) | Throughput (TensorRT) | Latency (PyTorch) | Latency (TensorRT) | +|------|-----------|--------------|------------------|-----------|----------------------|-----------------------|-------------------|--------------------| +| torchvision.models.alexnet.alexnet | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.29E-05 | 46.4 | 69.9 | 22.1 | 14.7 | +| torchvision.models.squeezenet.squeezenet1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.20E-02 | 44 | 137 | 24.2 | 7.6 | +| torchvision.models.squeezenet.squeezenet1_1 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.77E-04 | 76.6 | 248 | 14 | 4.34 | +| torchvision.models.resnet.resnet18 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.86E-03 | 29.4 | 90.2 | 34.7 | 11.4 | +| torchvision.models.resnet.resnet34 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.56E-01 | 15.5 | 50.7 | 64.8 | 20.2 | +| torchvision.models.resnet.resnet50 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 6.45E-02 | 12.4 | 34.2 | 81.7 | 29.8 | +| torchvision.models.resnet.resnet101 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.01E+03 | 7.18 | 19.9 | 141 | 51.1 | +| torchvision.models.resnet.resnet152 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 4.96 | 14.1 | 204 | 72.3 | +| torchvision.models.densenet.densenet121 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.42E-03 | 11.5 | 41.9 | 84.5 | 24.8 | +| torchvision.models.densenet.densenet169 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.86E-03 | 8.25 | 33.2 | 118 | 31.2 | +| torchvision.models.densenet.densenet201 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.42E-03 | 6.84 | 25.4 | 141 | 40.8 | +| torchvision.models.densenet.densenet161 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.15E-03 | 4.71 | 15.6 | 247 | 65.8 | +| torchvision.models.vgg.vgg11 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.51E-04 | 8.9 | 18.3 | 114 | 55.1 | +| torchvision.models.vgg.vgg13 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.07E-04 | 6.53 | 14.7 | 156 | 68.7 | +| torchvision.models.vgg.vgg16 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.58E-04 | 5.09 | 11.9 | 201 | 85.1 | +| torchvision.models.vgg.vgg11_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.81E-04 | 8.74 | 18.4 | 117 | 54.8 | +| torchvision.models.vgg.vgg13_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.19E-04 | 6.31 | 14.8 | 162 | 68.5 | +| torchvision.models.vgg.vgg16_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.77E-04 | 4.96 | 12 | 207 | 84.3 | diff --git a/docs/benchmarks/jetson_xavier.md b/docs/benchmarks/jetson_xavier.md new file mode 100644 index 00000000..1c3cb2c6 --- /dev/null +++ b/docs/benchmarks/jetson_xavier.md @@ -0,0 +1,33 @@ +# Jetson Xavier + +| Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | Throughput (PyTorch) | Throughput (TensorRT) | Latency (PyTorch) | Latency (TensorRT) | +|------|-----------|--------------|------------------|-----------|----------------------|-----------------------|-------------------|--------------------| +| torch2trt.tests.torchvision.classification.alexnet | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 7.63E-05 | 251 | 565 | 4.96 | 2.02 | +| torch2trt.tests.torchvision.classification.squeezenet1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.77E-04 | 121 | 834 | 8.04 | 1.49 | +| torch2trt.tests.torchvision.classification.squeezenet1_1 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.77E-04 | 125 | 1.29e+03 | 8.01 | 1.02 | +| torch2trt.tests.torchvision.classification.resnet18 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.77E-03 | 136 | 722 | 7.33 | 1.64 | +| torch2trt.tests.torchvision.classification.resnet34 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.50E-01 | 77.8 | 396 | 12.9 | 2.79 | +| torch2trt.tests.torchvision.classification.resnet50 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.09E-01 | 55.8 | 326 | 17.9 | 3.37 | +| torch2trt.tests.torchvision.classification.resnet101 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 28.3 | 175 | 35.1 | 6.04 | +| torch2trt.tests.torchvision.classification.resnet152 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 18.8 | 122 | 53.2 | 8.57 | +| torch2trt.tests.torchvision.classification.densenet121 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 7.81E-03 | 20.9 | 76.6 | 47.5 | 13 | +| torch2trt.tests.torchvision.classification.densenet169 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.91E-03 | 14.8 | 41.7 | 66.7 | 23.7 | +| torch2trt.tests.torchvision.classification.densenet201 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.88E-03 | 12.6 | 30.2 | 79.1 | 33 | +| torch2trt.tests.torchvision.classification.densenet161 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.88E-03 | 16.1 | 43.7 | 62.1 | 23 | +| torch2trt.tests.torchvision.classification.vgg11 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.56E-03 | 84.8 | 201 | 12.1 | 5.24 | +| torch2trt.tests.torchvision.classification.vgg13 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.24E-03 | 71.1 | 165 | 14.3 | 6.34 | +| torch2trt.tests.torchvision.classification.vgg16 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.78E-03 | 61.5 | 139 | 16.5 | 7.46 | +| torch2trt.tests.torchvision.classification.vgg19 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.81E-03 | 54.1 | 120 | 18.7 | 8.61 | +| torch2trt.tests.torchvision.classification.vgg11_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.20E-03 | 81.5 | 200 | 12.5 | 5.27 | +| torch2trt.tests.torchvision.classification.vgg13_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.71E-03 | 67.5 | 165 | 15.1 | 6.33 | +| torch2trt.tests.torchvision.classification.vgg16_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.87E-03 | 58.3 | 139 | 17.4 | 7.48 | +| torch2trt.tests.torchvision.classification.vgg19_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.44E-03 | 51.4 | 120 | 19.7 | 8.61 | +| torch2trt.tests.torchvision.classification.mobilenet_v2 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 64.8 | 723 | 15.4 | 1.67 | +| torch2trt.tests.torchvision.classification.shufflenet_v2_x0_5 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.53E-05 | 51.2 | 463 | 19.4 | 2.17 | +| torch2trt.tests.torchvision.classification.shufflenet_v2_x1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.53E-05 | 49.4 | 419 | 20.4 | 2.43 | +| torch2trt.tests.torchvision.classification.shufflenet_v2_x1_5 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.53E-05 | 51.4 | 426 | 19.6 | 2.37 | +| torch2trt.tests.torchvision.classification.shufflenet_v2_x2_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.53E-05 | 48.2 | 419 | 20.8 | 2.48 | +| torch2trt.tests.torchvision.classification.mnasnet0_5 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.03E-06 | 67.8 | 883 | 14.9 | 1.4 | +| torch2trt.tests.torchvision.classification.mnasnet0_75 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 67.6 | 751 | 14.8 | 1.6 | +| torch2trt.tests.torchvision.classification.mnasnet1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 65.7 | 667 | 15.2 | 1.77 | +| torch2trt.tests.torchvision.classification.mnasnet1_3 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 67.4 | 573 | 15 | 2.02 | diff --git a/docs/css/version-select.css b/docs/css/version-select.css new file mode 100644 index 00000000..49079bf4 --- /dev/null +++ b/docs/css/version-select.css @@ -0,0 +1,5 @@ +@media only screen and (max-width:76.1875em) { + #version-selector { + padding: .6rem .8rem; + } +} diff --git a/docs/getting_started.md b/docs/getting_started.md new file mode 100644 index 00000000..30adec06 --- /dev/null +++ b/docs/getting_started.md @@ -0,0 +1,84 @@ +# Getting Started + +Follow these steps to get started using torch2trt. + +## Installation + +!!! note + + torch2trt depends on the TensorRT Python API. On Jetson, this is included with the latest JetPack. For desktop, please follow the [TensorRT Installation Guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html). You may also try installing torch2trt inside one of the NGC PyTorch docker containers for [Desktop](https://ngc.nvidia.com/catalog/containers/nvidia:pytorch) or [Jetson](https://ngc.nvidia.com/catalog/containers/nvidia:l4t-pytorch). + +### Option 1 - Without plugins + +To install without compiling plugins, call the following + +```bash +git clone https://github.com/NVIDIA-AI-IOT/torch2trt +cd torch2trt +python setup.py install +``` + +### Option 2 - With plugins (experimental) + +To install with plugins to support some operations in PyTorch that are not natviely supported with TensorRT, call the following + +!!! note + + Please note, this currently only includes the interpolate plugin. This plugin requires PyTorch 1.3+ for serialization. + +```bash +git clone https://github.com/NVIDIA-AI-IOT/torch2trt +cd torch2trt +sudo python setup.py install --plugins +``` + +## Basic Usage + +Below are some usage examples, for more check out the [usage](usage) guide. + +### Convert + +```python +import torch +from torch2trt import torch2trt +from torchvision.models.alexnet import alexnet + +# create some regular pytorch model... +model = alexnet(pretrained=True).eval().cuda() + +# create example data +x = torch.ones((1, 3, 224, 224)).cuda() + +# convert to TensorRT feeding sample data as input +model_trt = torch2trt(model, [x]) +``` + +### Execute + +We can execute the returned ``TRTModule`` just like the original PyTorch model + +```python +y = model(x) +y_trt = model_trt(x) + +# check the output against PyTorch +print(torch.max(torch.abs(y - y_trt))) +``` + +### Save and load + +We can save the model as a ``state_dict``. + +```python +torch.save(model_trt.state_dict(), 'alexnet_trt.pth') +``` + +We can load the saved model into a ``TRTModule`` + +```python +from torch2trt import TRTModule + +model_trt = TRTModule() + +model_trt.load_state_dict(torch.load('alexnet_trt.pth')) +``` \ No newline at end of file diff --git a/docs/images/chart.svg b/docs/images/chart.svg new file mode 100644 index 00000000..28f0b295 --- /dev/null +++ b/docs/images/chart.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 00000000..91f8eca8 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,12 @@ +# torch2trt + + + +torch2trt is a PyTorch to TensorRT converter which utilizes the +TensorRT Python API. The converter is + +* Easy to use - Convert modules with a single function call ``torch2trt`` + +* Easy to extend - Write your own layer converter in Python and register it with ``@tensorrt_converter`` + +If you find an issue, please [let us know](https://github.com/NVIDIA-AI-IOT/torch2trt)! \ No newline at end of file diff --git a/docs/js/version-select.js b/docs/js/version-select.js new file mode 100644 index 00000000..794b5cc8 --- /dev/null +++ b/docs/js/version-select.js @@ -0,0 +1,49 @@ +window.addEventListener("DOMContentLoaded", function() { + // This is a bit hacky. Figure out the base URL from a known CSS file the + // template refers to... + var ex = new RegExp("/?css/version-select.css$"); + var sheet = document.querySelector('link[href$="version-select.css"]'); + + var ABS_BASE_URL = sheet.href.replace(ex, ""); + var CURRENT_VERSION = ABS_BASE_URL.split("/").pop(); + + function makeSelect(options, selected) { + var select = document.createElement("select"); + select.classList.add("form-control"); + + options.forEach(function(i) { + var option = new Option(i.text, i.value, undefined, + i.value === selected); + select.add(option); + }); + + return select; + } + + var xhr = new XMLHttpRequest(); + xhr.open("GET", ABS_BASE_URL + "/../versions.json"); + xhr.onload = function() { + var versions = JSON.parse(this.responseText); + + var realVersion = versions.find(function(i) { + return i.version === CURRENT_VERSION || + i.aliases.includes(CURRENT_VERSION); + }).version; + + var select = makeSelect(versions.map(function(i) { + return {text: i.title, value: i.version}; + }), realVersion); + select.addEventListener("change", function(event) { + window.location.href = ABS_BASE_URL + "/../" + this.value; + }); + + var container = document.createElement("div"); + container.id = "version-selector"; + container.className = "md-nav__item"; + container.appendChild(select); + + var sidebar = document.querySelector(".md-nav--primary > .md-nav__list"); + sidebar.parentNode.insertBefore(container, sidebar); + }; + xhr.send(); +}); diff --git a/docs/usage/basic_usage.md b/docs/usage/basic_usage.md new file mode 100644 index 00000000..f2abed1c --- /dev/null +++ b/docs/usage/basic_usage.md @@ -0,0 +1 @@ +# Basic Usage \ No newline at end of file diff --git a/docs/usage/batch_size.md b/docs/usage/batch_size.md new file mode 100644 index 00000000..fabdff82 --- /dev/null +++ b/docs/usage/batch_size.md @@ -0,0 +1 @@ +# Batch Size \ No newline at end of file diff --git a/docs/usage/cpp_usage.md b/docs/usage/cpp_usage.md new file mode 100644 index 00000000..892e28a6 --- /dev/null +++ b/docs/usage/cpp_usage.md @@ -0,0 +1,2 @@ +# Cpp Usage + diff --git a/docs/usage/custom_converter.md b/docs/usage/custom_converter.md new file mode 100644 index 00000000..2a1883f8 --- /dev/null +++ b/docs/usage/custom_converter.md @@ -0,0 +1 @@ +# Custom Converter \ No newline at end of file diff --git a/docs/usage/network_visualization.md b/docs/usage/network_visualization.md new file mode 100644 index 00000000..3d635a47 --- /dev/null +++ b/docs/usage/network_visualization.md @@ -0,0 +1 @@ +# Network Visualization \ No newline at end of file diff --git a/docs/usage/profiling.md b/docs/usage/profiling.md new file mode 100644 index 00000000..f6c15c4d --- /dev/null +++ b/docs/usage/profiling.md @@ -0,0 +1 @@ +# Profiling \ No newline at end of file diff --git a/docs/usage/reduced_precision.md b/docs/usage/reduced_precision.md new file mode 100644 index 00000000..1b31594f --- /dev/null +++ b/docs/usage/reduced_precision.md @@ -0,0 +1 @@ +# Reduced Precision \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 00000000..78e99d68 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,57 @@ +site_name: torch2trt +theme: + name: "material" + palette: + primary: green + secondary: light green + +repo_url: https://github.com/NVIDIA-AI-IOT/torch2trt + +plugins: + - search + +use_directory_urls: False + +edit_uri: blob/master +markdown_extensions: + - pymdownx.tabbed + - pymdownx.keys + - pymdownx.snippets + - pymdownx.inlinehilite + - pymdownx.highlight: + use_pygments: true + - admonition + - pymdownx.details + - pymdownx.superfences + - attr_list + +# use_directory_urls - False to fix broken raw html image links +# https://github.com/mkdocs/mkdocs/issues/991 + + +nav: + + - Home: index.md + - Getting Started: getting_started.md + - Usage: + - Basic Usage: usage/basic_usage.md + - Reduced Precision: usage/reduced_precision.md + - Batch Size: usage/batch_size.md + - Cpp Usage: usage/cpp_usage.md + - Custom Converter: usage/custom_converter.md + - Profiling: usage/profiling.md + - Network Visualization: usage/network_visualization.md + - Converters: converters.md + - Benchmarks: + - Jetson Nano: benchmarks/jetson_nano.md + - Jetson Xavier: benchmarks/jetson_xavier.md + +extra_css: + - css/version-select.css +extra_javascript: + - js/version-select.js + +google_analytics: + - UA-135919510-3 + - auto + \ No newline at end of file diff --git a/scripts/build_docs.sh b/scripts/build_docs.sh new file mode 100755 index 00000000..a1883671 --- /dev/null +++ b/scripts/build_docs.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +TAG=$1 + +# python3 scripts/dump_converters.py --tag=$TAG > docs/converters.md + +mike deploy $TAG \ No newline at end of file diff --git a/scripts/dump_converters.py b/scripts/dump_converters.py index ba271d7d..cd01465b 100644 --- a/scripts/dump_converters.py +++ b/scripts/dump_converters.py @@ -6,17 +6,28 @@ torch2trt = SourceFileLoader("torch2trt", "torch2trt/__init__.py").load_module() # to load relative to root +HEADER = """ +# Converters + +This table contains a list of supported PyTorch methods and their associated converters. + +If your model is not converting, a good start in debugging would be to see if it contains a method not listed +in this table. You may also find these a useful reference when writing your own converters. + +| Method | Converter | +|--------|-----------|""" + if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--github', type=str, default='https://github.com/NVIDIA-AI-IOT/torch2trt') + parser.add_argument('--tag', type=str, default='master') args = parser.parse_args() - - print('| Method | Converter |') - print('|--------|-----------|') - + + print(HEADER) + for method, entry in torch2trt.CONVERTERS.items(): if not entry['is_real']: @@ -25,19 +36,19 @@ converter = entry['converter'] # get commit hash - p = subprocess.Popen(['git', 'rev-parse', 'HEAD'], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - commit, err = p.communicate() - commit = commit.decode('utf-8').strip('\n') +# p = subprocess.Popen(['git', 'rev-parse', 'HEAD'], +# stdout=subprocess.PIPE, +# stderr=subprocess.PIPE) +# commit, err = p.communicate() +# commit = commit.decode('utf-8').strip('\n') # get github URL url = '{github}/blob/{commit}/{relpath}#L{lineno}'.format( github=args.github, - commit=str(commit), + commit=args.tag, relpath=os.path.relpath(converter.__code__.co_filename, os.path.abspath('.')), lineno=converter.__code__.co_firstlineno) - print('| {method} | [{converter}]({url}) |'.format( + print('| ``{method}`` | [``{converter}``]({url}) |'.format( method=method, converter=converter.__name__, url=url)) diff --git a/scripts/push_docs.sh b/scripts/push_docs.sh new file mode 100755 index 00000000..9d5fc870 --- /dev/null +++ b/scripts/push_docs.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +TAG=$1 + +python3 scripts/dump_converters.py > docs/converters.md + +mike deploy $TAG --push diff --git a/scripts/test_docs.sh b/scripts/test_docs.sh new file mode 100755 index 00000000..b9be6305 --- /dev/null +++ b/scripts/test_docs.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +TAG=$1 + +mike set-default $TAG +mike serve --dev-addr=0.0.0.0:8000 \ No newline at end of file From 1a59749d66e5dbdd020f32b4d1abf29ae370cfb1 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 2 Sep 2020 09:27:15 +0000 Subject: [PATCH 311/355] changelog contributing --- CHANGELOG.md | 1 + CONTRIBUTING.md | 1 + 2 files changed, 2 insertions(+) create mode 100644 CHANGELOG.md create mode 100644 CONTRIBUTING.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..5ddad421 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1 @@ +# Changelog \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..4d218d9f --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1 @@ +# Contributing \ No newline at end of file From 93abd9fe12a411c91005c65302518e8fd944fece Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 2 Sep 2020 10:06:45 +0000 Subject: [PATCH 312/355] docs --- CHANGELOG.md | 2 +- CONTRIBUTING.md | 52 ++++++++++++++++++++++++++++- docs/CHANGELOG.md | 1 + docs/CONTRIBUTING.md | 1 + docs/usage/debugging.md | 1 + docs/usage/network_visualization.md | 1 - mkdocs.yml | 4 ++- 7 files changed, 58 insertions(+), 4 deletions(-) create mode 120000 docs/CHANGELOG.md create mode 120000 docs/CONTRIBUTING.md create mode 100644 docs/usage/debugging.md delete mode 100644 docs/usage/network_visualization.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ddad421..252d4483 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1 +1 @@ -# Changelog \ No newline at end of file +# Changes \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4d218d9f..cadcb869 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1 +1,51 @@ -# Contributing \ No newline at end of file +# Contributing + +## Submit an Issue + +torch2trt is use case driven. We originally created it to solve +use cases related to NVIDIA Jetson, but the layer support has grown +largely since it's release and we've found that it has +helped many other developers as well. + +The growth of torch2trt has been largely driven by issues submitted on [GitHub](https://github.com/NVIDIA-AI-IOT/torch2trt/issues). +We learn a lot from the reported issues. Submitting an issue it is one of the best ways to begin contributing to torch2trt. + +The reported issues are typically are one of the following, + +* A bug or unexpected result +* A model with unsupported layers + +If you report an issue, we typically find the following information helpful + +* PyTorch version +* TensorRT version +* Platform (ie: Jetson Nano) +* The PyTorch Module you're attempting to convert +* The steps taken to convert the PyTorch module + +If you're not sure how to provide any of these pieces of information, don't worry. Just open the pull request +and we're happy to discuss and help work out the details. + +## Ask a Question + +Another great way to contribute is to ask a question on [GitHub](https://github.com/NVIDIA-AI-IOT/torch2trt/issues). +There are often other developers who share your question, and they may find the discussion helpful. This also +helps us gauge feature interest and identify gaps in documentation. + + +## Submit a Pull Request + +torch2trt is use case driven and has limited maintainence, for this reason we value community contributions greatly. +Another great way to contribute is by submitting a pull request. Pull requests which are most likely to be accepted are + +* A new converter +* A test case +* A bug fix + +If you add a new converter, it is best to include a few test +cases that cross validate the converter against the original PyTorch. We provide a utility function to do this, +as described in the [Custom Converter](usage/custom_converter.md) usage guide. + +Ideally pull requests solve one thing at a time. This makes it easy +to evaluate the impact that the changes have on the project step-by-step. The more confident we are that +the changes will not adversely impact the experience of other developers, the more likely we are to accept them. \ No newline at end of file diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md new file mode 120000 index 00000000..04c99a55 --- /dev/null +++ b/docs/CHANGELOG.md @@ -0,0 +1 @@ +../CHANGELOG.md \ No newline at end of file diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md new file mode 120000 index 00000000..44fcc634 --- /dev/null +++ b/docs/CONTRIBUTING.md @@ -0,0 +1 @@ +../CONTRIBUTING.md \ No newline at end of file diff --git a/docs/usage/debugging.md b/docs/usage/debugging.md new file mode 100644 index 00000000..94853297 --- /dev/null +++ b/docs/usage/debugging.md @@ -0,0 +1 @@ +# Debugging \ No newline at end of file diff --git a/docs/usage/network_visualization.md b/docs/usage/network_visualization.md deleted file mode 100644 index 3d635a47..00000000 --- a/docs/usage/network_visualization.md +++ /dev/null @@ -1 +0,0 @@ -# Network Visualization \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 78e99d68..c8534828 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -40,11 +40,13 @@ nav: - Cpp Usage: usage/cpp_usage.md - Custom Converter: usage/custom_converter.md - Profiling: usage/profiling.md - - Network Visualization: usage/network_visualization.md + - Debugging: usage/debugging.md - Converters: converters.md - Benchmarks: - Jetson Nano: benchmarks/jetson_nano.md - Jetson Xavier: benchmarks/jetson_xavier.md + - Contributing: CONTRIBUTING.md + - Changes: CHANGELOG.md extra_css: - css/version-select.css From 384ac89dcb9fdc4a5fd65f50457d723ea810accb Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 2 Sep 2020 10:10:01 +0000 Subject: [PATCH 313/355] typos --- CONTRIBUTING.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cadcb869..cfea8844 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -10,7 +10,7 @@ helped many other developers as well. The growth of torch2trt has been largely driven by issues submitted on [GitHub](https://github.com/NVIDIA-AI-IOT/torch2trt/issues). We learn a lot from the reported issues. Submitting an issue it is one of the best ways to begin contributing to torch2trt. -The reported issues are typically are one of the following, +The reported issues typically are one of the following, * A bug or unexpected result * A model with unsupported layers @@ -23,7 +23,7 @@ If you report an issue, we typically find the following information helpful * The PyTorch Module you're attempting to convert * The steps taken to convert the PyTorch module -If you're not sure how to provide any of these pieces of information, don't worry. Just open the pull request +If you're not sure how to provide any of these pieces of information, don't worry. Just open the issue and we're happy to discuss and help work out the details. ## Ask a Question From 656805b406d716453dbcdfaa6eb48bbb500b760d Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 2 Sep 2020 11:19:46 +0000 Subject: [PATCH 314/355] docs --- docs/see_also.md | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ mkdocs.yml | 1 + 2 files changed, 56 insertions(+) create mode 100644 docs/see_also.md diff --git a/docs/see_also.md b/docs/see_also.md new file mode 100644 index 00000000..064da014 --- /dev/null +++ b/docs/see_also.md @@ -0,0 +1,55 @@ +# See Also + +!!! note + + The state of these converters may change over time. We provide this information here with the hope that it will help shed light on the landscape of tools available for optimizing PyTorch models with TensorRT. + If you find this information helpful or outdated / misleading, please let us know. + +In addition to torch2trt, there are other workflows for optimizing your PyTorch model with TensorRT. + +The other converters we are aware of are + +* [ONNX to TensorRT](https://github.com/onnx/onnx-tensorrt) + +!!! tip + + Since the ONNX parser ships with TensorRT, we have included a convenience method for using this + workflow with torch2trt. If you want to quickly try the ONNX method using the torch2trt interface, just call ``torch2trt(..., use_onnx=True)``. + This will perform conversion on the module by exporting the model using PyTorch's JIT tracer, + and parsing with TensorRT's ONNX parser. + +* [TRTorch](https://github.com/NVIDIA/TRTorch) + +Which one you use depends largely on your use case. The differences often come down to + +## Layer support + +Modern deep learning frameworks are large, and there often arise +caveats converting between frameworks using a given workflow. These could include +limitations in serialization or parsing formats. Or in some instances, it may be possible +the layer could be supported, but it has just not been done yet. TRTorch is strong +in the sense that it will default to the original PyTorch method for layers +which are not converted to TensorRT. The best way to know +which conversion method works for you is to try converting your model. + +## Feature support + +TensorRT is evolving and the conversion workflows may have varying level +of feature support. In some instances, you may wish to use a latest feature of TensorRT, like dynamic shapes, +but it is not supported in torch2trt or the interface has not yet been exposed. In this +instance, we recommend checking to see if it is supported by one of the other workflows. The ONNX +converter is typically strong in this regards, since the parser is distributed with TensorRT. + +!!! note + + If there is a TensorRT feature you wished to see in torch2trt, please let us know. We can not gaurantee this will be done, but it helps us gauge interest. + +## Extensibility / Ease of Use + +In case none of the converters satisfy for your use case, you may find it necessary to adapt +the converter to fit your needs. This is very intuitive with torch2trt, +since it is done inline with Python, and there are many [examples](converters) to reference. If you know +how the original PyTorch method works, and have the TensorRT Python API on hand, it is relatively straight forward to adapt torch2trt to your needs. +The extensibility is often helpful when you want to implement a converter that is specific to the +context the layer appears in. + diff --git a/mkdocs.yml b/mkdocs.yml index c8534828..1790baed 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -47,6 +47,7 @@ nav: - Jetson Xavier: benchmarks/jetson_xavier.md - Contributing: CONTRIBUTING.md - Changes: CHANGELOG.md + - See Also: see_also.md extra_css: - css/version-select.css From 571cb00947d31adf0ad7efb52d67671f00c6b59d Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 2 Sep 2020 22:57:59 +0000 Subject: [PATCH 315/355] docs --- docs/getting_started.md | 56 ++------------------------------------- docs/usage/basic_usage.md | 51 ++++++++++++++++++++++++++++++++++- mkdocs.yml | 12 ++++----- 3 files changed, 58 insertions(+), 61 deletions(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index 30adec06..75ce7335 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -2,13 +2,11 @@ Follow these steps to get started using torch2trt. -## Installation - !!! note torch2trt depends on the TensorRT Python API. On Jetson, this is included with the latest JetPack. For desktop, please follow the [TensorRT Installation Guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html). You may also try installing torch2trt inside one of the NGC PyTorch docker containers for [Desktop](https://ngc.nvidia.com/catalog/containers/nvidia:pytorch) or [Jetson](https://ngc.nvidia.com/catalog/containers/nvidia:l4t-pytorch). -### Option 1 - Without plugins +### Install Without plugins To install without compiling plugins, call the following @@ -18,7 +16,7 @@ cd torch2trt python setup.py install ``` -### Option 2 - With plugins (experimental) +### Install With plugins To install with plugins to support some operations in PyTorch that are not natviely supported with TensorRT, call the following @@ -32,53 +30,3 @@ cd torch2trt sudo python setup.py install --plugins ``` -## Basic Usage - -Below are some usage examples, for more check out the [usage](usage) guide. - -### Convert - -```python -import torch -from torch2trt import torch2trt -from torchvision.models.alexnet import alexnet - -# create some regular pytorch model... -model = alexnet(pretrained=True).eval().cuda() - -# create example data -x = torch.ones((1, 3, 224, 224)).cuda() - -# convert to TensorRT feeding sample data as input -model_trt = torch2trt(model, [x]) -``` - -### Execute - -We can execute the returned ``TRTModule`` just like the original PyTorch model - -```python -y = model(x) -y_trt = model_trt(x) - -# check the output against PyTorch -print(torch.max(torch.abs(y - y_trt))) -``` - -### Save and load - -We can save the model as a ``state_dict``. - -```python -torch.save(model_trt.state_dict(), 'alexnet_trt.pth') -``` - -We can load the saved model into a ``TRTModule`` - -```python -from torch2trt import TRTModule - -model_trt = TRTModule() - -model_trt.load_state_dict(torch.load('alexnet_trt.pth')) -``` \ No newline at end of file diff --git a/docs/usage/basic_usage.md b/docs/usage/basic_usage.md index f2abed1c..4a63c84d 100644 --- a/docs/usage/basic_usage.md +++ b/docs/usage/basic_usage.md @@ -1 +1,50 @@ -# Basic Usage \ No newline at end of file +## Basic Usage + +Below are some usage examples, for more check out the [usage](usage) guide. + +### Convert + +```python +import torch +from torch2trt import torch2trt +from torchvision.models.alexnet import alexnet + +# create some regular pytorch model... +model = alexnet(pretrained=True).eval().cuda() + +# create example data +x = torch.ones((1, 3, 224, 224)).cuda() + +# convert to TensorRT feeding sample data as input +model_trt = torch2trt(model, [x]) +``` + +### Execute + +We can execute the returned ``TRTModule`` just like the original PyTorch model + +```python +y = model(x) +y_trt = model_trt(x) + +# check the output against PyTorch +print(torch.max(torch.abs(y - y_trt))) +``` + +### Save and load + +We can save the model as a ``state_dict``. + +```python +torch.save(model_trt.state_dict(), 'alexnet_trt.pth') +``` + +We can load the saved model into a ``TRTModule`` + +```python +from torch2trt import TRTModule + +model_trt = TRTModule() + +model_trt.load_state_dict(torch.load('alexnet_trt.pth')) +``` \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 1790baed..64c1116b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -35,12 +35,12 @@ nav: - Getting Started: getting_started.md - Usage: - Basic Usage: usage/basic_usage.md - - Reduced Precision: usage/reduced_precision.md - - Batch Size: usage/batch_size.md - - Cpp Usage: usage/cpp_usage.md - - Custom Converter: usage/custom_converter.md - - Profiling: usage/profiling.md - - Debugging: usage/debugging.md +# - Reduced Precision: usage/reduced_precision.md +# - Batch Size: usage/batch_size.md +# - Cpp Usage: usage/cpp_usage.md +# - Custom Converter: usage/custom_converter.md +# - Profiling: usage/profiling.md +# - Debugging: usage/debugging.md - Converters: converters.md - Benchmarks: - Jetson Nano: benchmarks/jetson_nano.md From a263fe071d18b31275dc49bf2431e66e301f2019 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 2 Sep 2020 23:29:12 +0000 Subject: [PATCH 316/355] docs build contrib --- CONTRIBUTING.md | 64 +++++++++++++++++-- scripts/build_docs.sh | 7 +- scripts/release_build_docs.sh | 7 ++ .../{push_docs.sh => release_push_docs.sh} | 0 scripts/release_test_docs.sh | 6 ++ scripts/test_docs.sh | 3 +- 6 files changed, 77 insertions(+), 10 deletions(-) mode change 100755 => 100644 scripts/build_docs.sh create mode 100755 scripts/release_build_docs.sh rename scripts/{push_docs.sh => release_push_docs.sh} (100%) create mode 100755 scripts/release_test_docs.sh mode change 100755 => 100644 scripts/test_docs.sh diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cfea8844..ec573589 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,8 @@ # Contributing -## Submit an Issue +## Forms of contribution + +### Submit an Issue torch2trt is use case driven. We originally created it to solve use cases related to NVIDIA Jetson, but the layer support has grown @@ -26,14 +28,14 @@ If you report an issue, we typically find the following information helpful If you're not sure how to provide any of these pieces of information, don't worry. Just open the issue and we're happy to discuss and help work out the details. -## Ask a Question +### Ask a Question Another great way to contribute is to ask a question on [GitHub](https://github.com/NVIDIA-AI-IOT/torch2trt/issues). There are often other developers who share your question, and they may find the discussion helpful. This also helps us gauge feature interest and identify gaps in documentation. -## Submit a Pull Request +### Submit a Pull Request torch2trt is use case driven and has limited maintainence, for this reason we value community contributions greatly. Another great way to contribute is by submitting a pull request. Pull requests which are most likely to be accepted are @@ -48,4 +50,58 @@ as described in the [Custom Converter](usage/custom_converter.md) usage guide. Ideally pull requests solve one thing at a time. This makes it easy to evaluate the impact that the changes have on the project step-by-step. The more confident we are that -the changes will not adversely impact the experience of other developers, the more likely we are to accept them. \ No newline at end of file +the changes will not adversely impact the experience of other developers, the more likely we are to accept them. + +## Running module test cases + +Before any change is accepted, we run the test cases on at least one platform. This performs a large number +of cross validation checks against PyTorch. To do this + +```bash +python3 -m torch2trt.test --name=converters --tolerance=1e-2 +``` + +This will not hard-fail, but will highlight any build errors or max error checks. It is helpful if you include +the status of this command in any pull-request, as well as system information like + +* PyTorch version +* TensorRT version +* Platform (ie: Jetson Nano) + +## Testing documentation + +If you have a change that modifies the documentation, it is relatively straightforward to test. We +use ``mkdocs-material`` for documentation, which parses markdown files in the ``docs`` folder. + +To view the docs, simply call + +``` +./scripts/test_docs.sh +``` + +And then navigate to ``:8000``. + +!!! note + + This will not include dynamically generated documentation pages like the converters page. + These contain cross reference links to the GitHub source code. If you want to test these + you can call + + ```bash + ./scripts/build_docs.sh + ``` + + Pointing to the public reflection + of your local repository. For example, if we're working off the upstream master branch, we + would call + + ```bash + ./scripts/build_docs.sh https://github.com/NVIDIA-AI-IOT/torch2trt master + ``` + + If your changes are pushed to your fork, you would do + + ```bash + ./scripts/build_docs.sh https://github.com//torch2trt my_branch + ``` + \ No newline at end of file diff --git a/scripts/build_docs.sh b/scripts/build_docs.sh old mode 100755 new mode 100644 index a1883671..52a8486a --- a/scripts/build_docs.sh +++ b/scripts/build_docs.sh @@ -1,7 +1,6 @@ #!/bin/bash -TAG=$1 +GITHUB=$1 +TAG=$2 -# python3 scripts/dump_converters.py --tag=$TAG > docs/converters.md - -mike deploy $TAG \ No newline at end of file +python3 scripts/dump_converters.py --github=$GITHUB --tag=$TAG > docs/converters.md diff --git a/scripts/release_build_docs.sh b/scripts/release_build_docs.sh new file mode 100755 index 00000000..a1883671 --- /dev/null +++ b/scripts/release_build_docs.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +TAG=$1 + +# python3 scripts/dump_converters.py --tag=$TAG > docs/converters.md + +mike deploy $TAG \ No newline at end of file diff --git a/scripts/push_docs.sh b/scripts/release_push_docs.sh similarity index 100% rename from scripts/push_docs.sh rename to scripts/release_push_docs.sh diff --git a/scripts/release_test_docs.sh b/scripts/release_test_docs.sh new file mode 100755 index 00000000..b9be6305 --- /dev/null +++ b/scripts/release_test_docs.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +TAG=$1 + +mike set-default $TAG +mike serve --dev-addr=0.0.0.0:8000 \ No newline at end of file diff --git a/scripts/test_docs.sh b/scripts/test_docs.sh old mode 100755 new mode 100644 index b9be6305..8fa6ab41 --- a/scripts/test_docs.sh +++ b/scripts/test_docs.sh @@ -2,5 +2,4 @@ TAG=$1 -mike set-default $TAG -mike serve --dev-addr=0.0.0.0:8000 \ No newline at end of file +mkdocs serve --dev-addr=0.0.0.0:8000 From 0655a80816f9f34833288d9125ca1fbeb3632fa1 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 2 Sep 2020 23:29:34 +0000 Subject: [PATCH 317/355] build docs --- scripts/build_docs.sh | 0 scripts/test_docs.sh | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 scripts/build_docs.sh mode change 100644 => 100755 scripts/test_docs.sh diff --git a/scripts/build_docs.sh b/scripts/build_docs.sh old mode 100644 new mode 100755 diff --git a/scripts/test_docs.sh b/scripts/test_docs.sh old mode 100644 new mode 100755 From 3d496ae0a062cdfd8e5f40e62f41fb156ab71f74 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 2 Sep 2020 23:49:44 +0000 Subject: [PATCH 318/355] docs merge --- CONTRIBUTING.md | 17 ----------------- mkdocs.yml | 9 --------- 2 files changed, 26 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bf6817e2..92ca8891 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,12 +1,8 @@ # Contributing -<<<<<<< HEAD ## Forms of contribution ### Submit an Issue -======= -## Submit an Issue ->>>>>>> 4933faf05cd9e5d9aef2b23e7fe692bfda5fbc06 torch2trt is use case driven. We originally created it to solve use cases related to NVIDIA Jetson, but the layer support has grown @@ -32,22 +28,13 @@ If you report an issue, we typically find the following information helpful If you're not sure how to provide any of these pieces of information, don't worry. Just open the issue and we're happy to discuss and help work out the details. -<<<<<<< HEAD ### Ask a Question -======= -## Ask a Question ->>>>>>> 4933faf05cd9e5d9aef2b23e7fe692bfda5fbc06 Another great way to contribute is to ask a question on [GitHub](https://github.com/NVIDIA-AI-IOT/torch2trt/issues). There are often other developers who share your question, and they may find the discussion helpful. This also helps us gauge feature interest and identify gaps in documentation. - -<<<<<<< HEAD ### Submit a Pull Request -======= -## Submit a Pull Request ->>>>>>> 4933faf05cd9e5d9aef2b23e7fe692bfda5fbc06 torch2trt is use case driven and has limited maintainence, for this reason we value community contributions greatly. Another great way to contribute is by submitting a pull request. Pull requests which are most likely to be accepted are @@ -62,7 +49,6 @@ as described in the [Custom Converter](usage/custom_converter.md) usage guide. Ideally pull requests solve one thing at a time. This makes it easy to evaluate the impact that the changes have on the project step-by-step. The more confident we are that -<<<<<<< HEAD the changes will not adversely impact the experience of other developers, the more likely we are to accept them. ## Running module test cases @@ -118,6 +104,3 @@ And then navigate to ``:8000``. ./scripts/build_docs.sh https://github.com//torch2trt my_branch ``` -======= -the changes will not adversely impact the experience of other developers, the more likely we are to accept them. ->>>>>>> 4933faf05cd9e5d9aef2b23e7fe692bfda5fbc06 diff --git a/mkdocs.yml b/mkdocs.yml index a0b7896e..64c1116b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -35,21 +35,12 @@ nav: - Getting Started: getting_started.md - Usage: - Basic Usage: usage/basic_usage.md -<<<<<<< HEAD # - Reduced Precision: usage/reduced_precision.md # - Batch Size: usage/batch_size.md # - Cpp Usage: usage/cpp_usage.md # - Custom Converter: usage/custom_converter.md # - Profiling: usage/profiling.md # - Debugging: usage/debugging.md -======= - - Reduced Precision: usage/reduced_precision.md - - Batch Size: usage/batch_size.md - - Cpp Usage: usage/cpp_usage.md - - Custom Converter: usage/custom_converter.md - - Profiling: usage/profiling.md - - Debugging: usage/debugging.md ->>>>>>> 4933faf05cd9e5d9aef2b23e7fe692bfda5fbc06 - Converters: converters.md - Benchmarks: - Jetson Nano: benchmarks/jetson_nano.md From 88d80383835f6b67cca2977f819d1bce6be083a2 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 2 Sep 2020 23:56:56 +0000 Subject: [PATCH 319/355] merge fixes --- docs/getting_started.md | 66 --------------------------------------- docs/usage/basic_usage.md | 12 +++---- 2 files changed, 4 insertions(+), 74 deletions(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index 10908392..75ce7335 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -2,20 +2,11 @@ Follow these steps to get started using torch2trt. -<<<<<<< HEAD -======= -## Installation - ->>>>>>> 4933faf05cd9e5d9aef2b23e7fe692bfda5fbc06 !!! note torch2trt depends on the TensorRT Python API. On Jetson, this is included with the latest JetPack. For desktop, please follow the [TensorRT Installation Guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html). You may also try installing torch2trt inside one of the NGC PyTorch docker containers for [Desktop](https://ngc.nvidia.com/catalog/containers/nvidia:pytorch) or [Jetson](https://ngc.nvidia.com/catalog/containers/nvidia:l4t-pytorch). -<<<<<<< HEAD ### Install Without plugins -======= -### Option 1 - Without plugins ->>>>>>> 4933faf05cd9e5d9aef2b23e7fe692bfda5fbc06 To install without compiling plugins, call the following @@ -25,11 +16,7 @@ cd torch2trt python setup.py install ``` -<<<<<<< HEAD ### Install With plugins -======= -### Option 2 - With plugins (experimental) ->>>>>>> 4933faf05cd9e5d9aef2b23e7fe692bfda5fbc06 To install with plugins to support some operations in PyTorch that are not natviely supported with TensorRT, call the following @@ -43,56 +30,3 @@ cd torch2trt sudo python setup.py install --plugins ``` -<<<<<<< HEAD -======= -## Basic Usage - -Below are some usage examples, for more check out the [usage](usage) guide. - -### Convert - -```python -import torch -from torch2trt import torch2trt -from torchvision.models.alexnet import alexnet - -# create some regular pytorch model... -model = alexnet(pretrained=True).eval().cuda() - -# create example data -x = torch.ones((1, 3, 224, 224)).cuda() - -# convert to TensorRT feeding sample data as input -model_trt = torch2trt(model, [x]) -``` - -### Execute - -We can execute the returned ``TRTModule`` just like the original PyTorch model - -```python -y = model(x) -y_trt = model_trt(x) - -# check the output against PyTorch -print(torch.max(torch.abs(y - y_trt))) -``` - -### Save and load - -We can save the model as a ``state_dict``. - -```python -torch.save(model_trt.state_dict(), 'alexnet_trt.pth') -``` - -We can load the saved model into a ``TRTModule`` - -```python -from torch2trt import TRTModule - -model_trt = TRTModule() - -model_trt.load_state_dict(torch.load('alexnet_trt.pth')) -``` ->>>>>>> 4933faf05cd9e5d9aef2b23e7fe692bfda5fbc06 diff --git a/docs/usage/basic_usage.md b/docs/usage/basic_usage.md index 04bbcdac..674ea3ee 100644 --- a/docs/usage/basic_usage.md +++ b/docs/usage/basic_usage.md @@ -1,9 +1,8 @@ -<<<<<<< HEAD -## Basic Usage +# Basic Usage Below are some usage examples, for more check out the [usage](usage) guide. -### Convert +## Convert ```python import torch @@ -20,7 +19,7 @@ x = torch.ones((1, 3, 224, 224)).cuda() model_trt = torch2trt(model, [x]) ``` -### Execute +## Execute We can execute the returned ``TRTModule`` just like the original PyTorch model @@ -32,7 +31,7 @@ y_trt = model_trt(x) print(torch.max(torch.abs(y - y_trt))) ``` -### Save and load +## Save and load We can save the model as a ``state_dict``. @@ -49,6 +48,3 @@ model_trt = TRTModule() model_trt.load_state_dict(torch.load('alexnet_trt.pth')) ``` -======= -# Basic Usage ->>>>>>> 4933faf05cd9e5d9aef2b23e7fe692bfda5fbc06 From 51609012ddf0a5e340dd73933a3b210d88afa424 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 3 Sep 2020 00:10:05 +0000 Subject: [PATCH 320/355] asdf --- docs/usage/custom_converter.md | 47 +++++++++++++++++++++++++++++++++- scripts/build_docs.sh | 8 ------ scripts/test_docs.sh | 9 +------ 3 files changed, 47 insertions(+), 17 deletions(-) diff --git a/docs/usage/custom_converter.md b/docs/usage/custom_converter.md index 2a1883f8..8270c79d 100644 --- a/docs/usage/custom_converter.md +++ b/docs/usage/custom_converter.md @@ -1 +1,46 @@ -# Custom Converter \ No newline at end of file +# Custom Converter + +This page details how to extend or modify the behavior of torch2trt by implementing and registering +custom converters. + +## Background + +torch2trt works by attaching conversion functions (like ``convert_ReLU``) to the original +PyTorch functional calls (like ``torch.nn.ReLU.forward``). The sample input data is passed +through the network, just as before, except now whenever a registered function (``torch.nn.ReLU.forward``) +is encountered, the corresponding converter (``convert_ReLU``) is also called afterwards. The converter +is passed the arguments and return statement of the original PyTorch function, as well as the TensorRT +network that is being constructed. The input tensors to the original PyTorch function are modified to +have an attribute ``_trt``, which is the TensorRT counterpart to the PyTorch tensor. The conversion function +uses this ``_trt`` to add layers to the TensorRT network, and then sets the ``_trt`` attribute for +relevant output tensors. Once the model is fully executed, the final tensors returns are marked as outputs +of the TensorRT network, and the optimized TensorRT engine is built. + +## Add a custom converter + +Here we show how to add a converter for the ``ReLU`` module using the TensorRT +python API. + +```python +import tensorrt as trt +from torch2trt import tensorrt_converter + +@tensorrt_converter('torch.nn.ReLU.forward') +def convert_ReLU(ctx): + input = ctx.method_args[1] + output = ctx.method_return + layer = ctx.network.add_activation(input=input._trt, type=trt.ActivationType.RELU) + output._trt = layer.get_output(0) +``` + +The converter takes one argument, a ``ConversionContext``, which will contain +the following + +* ``ctx.network`` - The TensorRT network that is being constructed. + +* ``ctx.method_args`` - Positional arguments that were passed to the specified PyTorch function. The ``_trt`` attribute is set for relevant input tensors. +* ``ctx.method_kwargs`` - Keyword arguments that were passed to the specified PyTorch function. +* ``ctx.method_return`` - The value returned by the specified PyTorch function. The converter must set the ``_trt`` attribute where relevant. + +Please see the [converters](../converters.md) page for a list of implemented converters and links to their source code. These may help +in learning how to write converters. diff --git a/scripts/build_docs.sh b/scripts/build_docs.sh index 85b673b7..52a8486a 100755 --- a/scripts/build_docs.sh +++ b/scripts/build_docs.sh @@ -1,14 +1,6 @@ #!/bin/bash -<<<<<<< HEAD GITHUB=$1 TAG=$2 python3 scripts/dump_converters.py --github=$GITHUB --tag=$TAG > docs/converters.md -======= -TAG=$1 - -# python3 scripts/dump_converters.py --tag=$TAG > docs/converters.md - -mike deploy $TAG ->>>>>>> 4933faf05cd9e5d9aef2b23e7fe692bfda5fbc06 diff --git a/scripts/test_docs.sh b/scripts/test_docs.sh index 95343932..aef325f9 100755 --- a/scripts/test_docs.sh +++ b/scripts/test_docs.sh @@ -1,10 +1,3 @@ #!/bin/bash -TAG=$1 - -<<<<<<< HEAD -mkdocs serve --dev-addr=0.0.0.0:8000 -======= -mike set-default $TAG -mike serve --dev-addr=0.0.0.0:8000 ->>>>>>> 4933faf05cd9e5d9aef2b23e7fe692bfda5fbc06 +mkdocs serve --dev-addr=0.0.0.0:8000 \ No newline at end of file From 5aae5762bfb7e0acfc1328df7a3091877e26ac99 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 3 Sep 2020 00:24:06 +0000 Subject: [PATCH 321/355] mkdocs --- mkdocs.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index 64c1116b..a4b8fb07 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -35,12 +35,7 @@ nav: - Getting Started: getting_started.md - Usage: - Basic Usage: usage/basic_usage.md -# - Reduced Precision: usage/reduced_precision.md -# - Batch Size: usage/batch_size.md -# - Cpp Usage: usage/cpp_usage.md -# - Custom Converter: usage/custom_converter.md -# - Profiling: usage/profiling.md -# - Debugging: usage/debugging.md + - Custom Converter: usage/custom_converter.md - Converters: converters.md - Benchmarks: - Jetson Nano: benchmarks/jetson_nano.md From 86d9c3aa6c3e21dc5b0071fcb6cc828f4cf0b6a4 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 3 Sep 2020 00:25:28 +0000 Subject: [PATCH 322/355] docs --- docs/usage/batch_size.md | 1 - docs/usage/cpp_usage.md | 2 -- docs/usage/debugging.md | 1 - docs/usage/profiling.md | 1 - docs/usage/reduced_precision.md | 1 - mkdocs.yml | 1 - 6 files changed, 7 deletions(-) delete mode 100644 docs/usage/batch_size.md delete mode 100644 docs/usage/cpp_usage.md delete mode 100644 docs/usage/debugging.md delete mode 100644 docs/usage/profiling.md delete mode 100644 docs/usage/reduced_precision.md diff --git a/docs/usage/batch_size.md b/docs/usage/batch_size.md deleted file mode 100644 index fabdff82..00000000 --- a/docs/usage/batch_size.md +++ /dev/null @@ -1 +0,0 @@ -# Batch Size \ No newline at end of file diff --git a/docs/usage/cpp_usage.md b/docs/usage/cpp_usage.md deleted file mode 100644 index 892e28a6..00000000 --- a/docs/usage/cpp_usage.md +++ /dev/null @@ -1,2 +0,0 @@ -# Cpp Usage - diff --git a/docs/usage/debugging.md b/docs/usage/debugging.md deleted file mode 100644 index 94853297..00000000 --- a/docs/usage/debugging.md +++ /dev/null @@ -1 +0,0 @@ -# Debugging \ No newline at end of file diff --git a/docs/usage/profiling.md b/docs/usage/profiling.md deleted file mode 100644 index f6c15c4d..00000000 --- a/docs/usage/profiling.md +++ /dev/null @@ -1 +0,0 @@ -# Profiling \ No newline at end of file diff --git a/docs/usage/reduced_precision.md b/docs/usage/reduced_precision.md deleted file mode 100644 index 1b31594f..00000000 --- a/docs/usage/reduced_precision.md +++ /dev/null @@ -1 +0,0 @@ -# Reduced Precision \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index a4b8fb07..6c49574c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -41,7 +41,6 @@ nav: - Jetson Nano: benchmarks/jetson_nano.md - Jetson Xavier: benchmarks/jetson_xavier.md - Contributing: CONTRIBUTING.md - - Changes: CHANGELOG.md - See Also: see_also.md extra_css: From 67de2146017e073dec1033988b5384e467c5e251 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 3 Sep 2020 00:31:06 +0000 Subject: [PATCH 323/355] contrib plain md --- CONTRIBUTING.md | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 92ca8891..6479c3a6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -78,29 +78,27 @@ To view the docs, simply call ./scripts/test_docs.sh ``` -And then navigate to ``:8000``. +And then navigate to ``https://:8000``. -!!! note - - This will not include dynamically generated documentation pages like the converters page. - These contain cross reference links to the GitHub source code. If you want to test these - you can call - - ```bash - ./scripts/build_docs.sh - ``` - - Pointing to the public reflection - of your local repository. For example, if we're working off the upstream master branch, we - would call +Please note, this will not include dynamically generated documentation pages like the converters page. +These contain cross reference links to the GitHub source code. If you want to test these +you can call - ```bash - ./scripts/build_docs.sh https://github.com/NVIDIA-AI-IOT/torch2trt master - ``` +```bash +./scripts/build_docs.sh +``` - If your changes are pushed to your fork, you would do +Pointing to the public reflection +of your local repository. For example, if we're working off the upstream master branch, we +would call + +```bash +./scripts/build_docs.sh https://github.com/NVIDIA-AI-IOT/torch2trt master +``` - ```bash - ./scripts/build_docs.sh https://github.com//torch2trt my_branch - ``` +If your changes are pushed to your fork, you would do + +```bash +./scripts/build_docs.sh https://github.com//torch2trt my_branch +``` From 6c0aec73266b32e30f7151380517b7e297850edf Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 3 Sep 2020 00:36:08 +0000 Subject: [PATCH 324/355] basic usage --- docs/usage/basic_usage.md | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/docs/usage/basic_usage.md b/docs/usage/basic_usage.md index 674ea3ee..f4787656 100644 --- a/docs/usage/basic_usage.md +++ b/docs/usage/basic_usage.md @@ -1,8 +1,10 @@ # Basic Usage -Below are some usage examples, for more check out the [usage](usage) guide. +This page demonstrates basic torch2trt usage. -## Convert +## Conversion + +You can easily convert a PyTorch module by calling ``torch2trt`` passing example data as input, for example to convert ``alexnet`` we call ```python import torch @@ -19,9 +21,16 @@ x = torch.ones((1, 3, 224, 224)).cuda() model_trt = torch2trt(model, [x]) ``` -## Execute +!!! note + + Currently with torch2trt, once the model is converted, you must use the same input shapes during + execution. The exception is + the batch size, which can vary up to the value specified by the ``max_batch_size`` parameter. + +## Executution -We can execute the returned ``TRTModule`` just like the original PyTorch model +We can execute the returned ``TRTModule`` just like the original PyTorch model. Here we +execute the model and print the maximum absolute error. ```python y = model(x) @@ -31,7 +40,7 @@ y_trt = model_trt(x) print(torch.max(torch.abs(y - y_trt))) ``` -## Save and load +## Saving and loading We can save the model as a ``state_dict``. From bd2cd11bb9b51ef880345dfdb79f49d5f9875bdc Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 3 Sep 2020 00:37:18 +0000 Subject: [PATCH 325/355] issues pathfix --- docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index 91f8eca8..6114cdc9 100644 --- a/docs/index.md +++ b/docs/index.md @@ -9,4 +9,4 @@ TensorRT Python API. The converter is * Easy to extend - Write your own layer converter in Python and register it with ``@tensorrt_converter`` -If you find an issue, please [let us know](https://github.com/NVIDIA-AI-IOT/torch2trt)! \ No newline at end of file +If you find an issue, please [let us know](https://github.com/NVIDIA-AI-IOT/torch2trt/issues)! \ No newline at end of file From 50169efd63e3b35abb5ca2d4aa75381321d9b194 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 3 Sep 2020 00:38:26 +0000 Subject: [PATCH 326/355] broken link --- docs/see_also.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/see_also.md b/docs/see_also.md index 064da014..5217f1c5 100644 --- a/docs/see_also.md +++ b/docs/see_also.md @@ -48,7 +48,7 @@ converter is typically strong in this regards, since the parser is distributed w In case none of the converters satisfy for your use case, you may find it necessary to adapt the converter to fit your needs. This is very intuitive with torch2trt, -since it is done inline with Python, and there are many [examples](converters) to reference. If you know +since it is done inline with Python, and there are many [examples](converters.md) to reference. If you know how the original PyTorch method works, and have the TensorRT Python API on hand, it is relatively straight forward to adapt torch2trt to your needs. The extensibility is often helpful when you want to implement a converter that is specific to the context the layer appears in. From 62afaf1e246d278d1ee09452e3ae94de07fa45be Mon Sep 17 00:00:00 2001 From: John Welsh Date: Thu, 3 Sep 2020 00:46:56 +0000 Subject: [PATCH 327/355] release build docs --- scripts/release_build_docs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/release_build_docs.sh b/scripts/release_build_docs.sh index a1883671..0edc28d8 100755 --- a/scripts/release_build_docs.sh +++ b/scripts/release_build_docs.sh @@ -2,6 +2,6 @@ TAG=$1 -# python3 scripts/dump_converters.py --tag=$TAG > docs/converters.md +python3 scripts/dump_converters.py --tag=$TAG > docs/converters.md mike deploy $TAG \ No newline at end of file From 3efb89d0149dad8a54fbcc96ffff479f31a0d521 Mon Sep 17 00:00:00 2001 From: John Date: Wed, 2 Sep 2020 22:06:12 -0700 Subject: [PATCH 328/355] Reduce precision docs (#400) * reduced precision docs * support matrix * docs --- CHANGELOG.md | 8 +- README.md | 2 + docs/images/check.svg | 1 + docs/usage/reduced_precision.md | 152 ++++++++++++++++++++++++++++++++ mkdocs.yml | 1 + 5 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 docs/images/check.svg create mode 100644 docs/usage/reduced_precision.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 252d4483..21b61710 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1 +1,7 @@ -# Changes \ No newline at end of file +# Changes + +## [Master] + +### Added + +- Added reduced precision documentation page \ No newline at end of file diff --git a/README.md b/README.md index 4c5d0fc6..7f73c56a 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # torch2trt + + torch2trt is a PyTorch to TensorRT converter which utilizes the TensorRT Python API. The converter is diff --git a/docs/images/check.svg b/docs/images/check.svg new file mode 100644 index 00000000..cf59f02c --- /dev/null +++ b/docs/images/check.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/usage/reduced_precision.md b/docs/usage/reduced_precision.md new file mode 100644 index 00000000..02b4bd35 --- /dev/null +++ b/docs/usage/reduced_precision.md @@ -0,0 +1,152 @@ +# Reduced Precision + +For certain platforms, reduced precision can result in substantial improvements in throughput, +often with little impact on model accuracy. + +# Support Matrix + +Below is a table of layer precision support for various NVIDIA platforms. + +| Platform | FP16 | INT8 | +|----------|------|------| +| Jetson Nano | ![X](../images/check.svg) | | +| Jetson TX2 | ![X](../images/check.svg) | ![X](../images/check.svg) | +| Jetson Xavier NX | ![X](../images/check.svg) | ![X](../images/check.svg) | +| Jetson AGX Xavier | ![X](../images/check.svg) | ![X](../images/check.svg) | + +!!! note + + If the platform you're using is missing from this table or you spot anything incorrect + please [let us know](https://github.com/NVIDIA-AI-IOT/torch2trt). + +## FP16 Precision + +To enable support for fp16 precision with TensorRT, torch2trt exposes the ``fp16_mode`` parameter. +Converting a model with ``fp16_mode=True`` allows the TensorRT optimizer to select layers with fp16 +precision. + + +```python +model_trt = torch2trt(model, [data], fp16_mode=True) +``` + +!!! note + + When ``fp16_mode=True``, this does not necessarily mean that TensorRT will select FP16 layers. + The optimizer attempts to automatically select tactics which result in the best performance. + +## INT8 Precision + +torch2trt also supports int8 precision with TensorRT with the ``int8_mode`` parameter. Unlike fp16 and fp32 precision, switching +to in8 precision often requires calibration to avoid a significant drop in accuracy. + +### Input Data Calibration + +By default +torch2trt will calibrate using the input data provided. For example, if you wanted +to calibrate on a set of 64 random normal images you could do. + +```python +data = torch.randn(64, 3, 224, 224).cuda().eval() + +model_trt = torch2trt(model, [data], int8_mode=True) +``` + +### Dataset Calibration + +In many instances, you may want to calibrate on more data than fits in memory. For this reason, +torch2trt exposes the ``int8_calibration_dataset`` parameter. This parameter takes an input +dataset that is used for calibration. If this parameter is specified, the input data is +ignored during calibration. You create an input dataset by defining +a class which implements the ``__len__`` and ``__getitem__`` methods. + +* The ``__len__`` method should return the number of calibration samples +* The ``__getitem__`` method must return a single calibration sample. This is a list of input tensors to the model. Each tensor should match the shape +you provide to the ``inputs`` parameter when calling ``torch2trt``. + +For example, say you trained an image classification network using the PyTorch [``ImageFolder``](https://pytorch.org/docs/stable/torchvision/datasets.html#imagefolder) dataset. +You could wrap this dataset for calibration, by defining a new dataset which returns only the images without labels in list format. + +```python +from torchvision.datasets import ImageFolder +from torchvision.transforms import ToTensor, Compose, Normalize + + +class ImageFolderCalibDataset(): + + def __init__(self, root): + self.dataset = ImageFolder( + root=root, + transform=Compose([ + transforms.Resize((224, 224)), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]) + ) + + def __len__(self): + return len(self.dataset) + + def __getitem__(self, idx): + image, _ = self.dataset[idx] + image = image[None, ...] # add batch dimension + return [image] +``` + +You would then provide this calibration dataset to torch2trt as follows + +```python +dataset = ImageFolderCalibDataset('images') + +model_trt = torch2trt(model, [data], int8_calib_dataset=dataset) +``` + +### Calibration Algorithm + +To override the default calibration algorithm that torch2trt uses, you can set the ``int8_calib_algoirthm`` +to the [``tensorrt.CalibrationAlgoType``](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Int8/Calibrator.html#iint8calibrator) +that you wish to use. For example, to use the minmax calibration algoirthm you would do + +```python +import tensorrt as trt + +model_trt = torch2trt(model, [data], int8_mode=True, int8_calib_algorithm=trt.CalibrationAlgoType.MINMAX_CALIBRATION) +``` + +### Calibration Batch Size + +During calibration, torch2trt pulls data in batches for the TensorRT calibrator. In some instances +[developers have found](https://github.com/NVIDIA-AI-IOT/torch2trt/pull/398) that the calibration batch size can impact the calibrated model accuracy. To set the calibration batch size, you can set the ``int8_calib_batch_size`` +parameter. For example, to use a calibration batch size of 32 you could do + +```python +model_trt = torch2trt(model, [data], int8_mode=True, int8_calib_batch_size=32) +``` + +## Binding Data Types + +The data type of input and output bindings in TensorRT are determined by the original +PyTorch module input and output data types. +This does not directly impact whether the TensorRT optimizer will internally use fp16 or int8 precision. + +For example, to create a model with half precision bindings, you would do the following + +```python +model = model.float() +data = data.float() + +model_trt = torch2trt(model, [data], fp16_mode=True) +``` + +In this instance, the optimizer may choose to use fp16 precision layers internally, but the +input and output data types are fp32. To use fp16 precision input and output bindings you would do + +```python +model = model.half() +data = data.half() + +model_trt = torch2trt(model, [data], fp16_mode=True) +``` + +Now, the input and output bindings of the model are half precision, and internally the optimizer may +choose to select fp16 layers as well. \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 6c49574c..fcc8c127 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -35,6 +35,7 @@ nav: - Getting Started: getting_started.md - Usage: - Basic Usage: usage/basic_usage.md + - Reduced Precision: usage/reduced_precision.md - Custom Converter: usage/custom_converter.md - Converters: converters.md - Benchmarks: From 41417305e1afb4443396173396b45e27351563a5 Mon Sep 17 00:00:00 2001 From: John Date: Thu, 3 Sep 2020 15:51:35 -0700 Subject: [PATCH 329/355] Update reduced_precision.md (#402) --- docs/usage/reduced_precision.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/usage/reduced_precision.md b/docs/usage/reduced_precision.md index 02b4bd35..1d288525 100644 --- a/docs/usage/reduced_precision.md +++ b/docs/usage/reduced_precision.md @@ -69,7 +69,7 @@ You could wrap this dataset for calibration, by defining a new dataset which ret ```python from torchvision.datasets import ImageFolder -from torchvision.transforms import ToTensor, Compose, Normalize +from torchvision.transforms import ToTensor, Compose, Normalize, Resize class ImageFolderCalibDataset(): @@ -78,9 +78,9 @@ class ImageFolderCalibDataset(): self.dataset = ImageFolder( root=root, transform=Compose([ - transforms.Resize((224, 224)), - transforms.ToTensor(), - transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + Resize((224, 224)), + ToTensor(), + Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) ) @@ -105,7 +105,7 @@ model_trt = torch2trt(model, [data], int8_calib_dataset=dataset) To override the default calibration algorithm that torch2trt uses, you can set the ``int8_calib_algoirthm`` to the [``tensorrt.CalibrationAlgoType``](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Int8/Calibrator.html#iint8calibrator) -that you wish to use. For example, to use the minmax calibration algoirthm you would do +that you wish to use. For example, to use the minmax calibration algorithm you would do ```python import tensorrt as trt @@ -129,7 +129,7 @@ The data type of input and output bindings in TensorRT are determined by the ori PyTorch module input and output data types. This does not directly impact whether the TensorRT optimizer will internally use fp16 or int8 precision. -For example, to create a model with half precision bindings, you would do the following +For example, to create a model with fp32 precision bindings, you would do the following ```python model = model.float() @@ -149,4 +149,4 @@ model_trt = torch2trt(model, [data], fp16_mode=True) ``` Now, the input and output bindings of the model are half precision, and internally the optimizer may -choose to select fp16 layers as well. \ No newline at end of file +choose to select fp16 layers as well. From 756377002708121d43a3e66ba72f56af65696402 Mon Sep 17 00:00:00 2001 From: John Date: Thu, 3 Sep 2020 15:53:13 -0700 Subject: [PATCH 330/355] Update mkdocs.yml --- mkdocs.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index fcc8c127..27e4d2ed 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -12,7 +12,7 @@ plugins: use_directory_urls: False -edit_uri: blob/master +edit_uri: blob/master/docs markdown_extensions: - pymdownx.tabbed - pymdownx.keys @@ -52,4 +52,4 @@ extra_javascript: google_analytics: - UA-135919510-3 - auto - \ No newline at end of file + From ea835dc368903837a72e8393947e82c52a64488a Mon Sep 17 00:00:00 2001 From: John Date: Mon, 14 Sep 2020 19:35:57 -0700 Subject: [PATCH 331/355] remove ndim referneces to support older pytorch versions (#415) --- CHANGELOG.md | 3 ++- torch2trt/converters/ReLU6.py | 4 ++-- torch2trt/converters/add.py | 4 ++-- torch2trt/converters/cat.py | 4 ++-- torch2trt/converters/compare.py | 2 +- torch2trt/converters/div.py | 6 +++--- torch2trt/converters/getitem.py | 4 ++-- torch2trt/converters/instance_norm.py | 4 ++-- torch2trt/converters/max.py | 6 +++--- torch2trt/converters/min.py | 6 +++--- torch2trt/converters/mul.py | 4 ++-- torch2trt/converters/pow.py | 6 +++--- torch2trt/converters/prod.py | 4 ++-- torch2trt/converters/softmax.py | 4 ++-- torch2trt/converters/sub.py | 6 +++--- torch2trt/converters/sum.py | 4 ++-- torch2trt/torch2trt.py | 2 +- 17 files changed, 37 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 21b61710..e5c0007b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,4 +4,5 @@ ### Added -- Added reduced precision documentation page \ No newline at end of file +- Replaced Tensor.ndim references with len(tensor.shape) to support older pytorch versions +- Added reduced precision documentation page diff --git a/torch2trt/converters/ReLU6.py b/torch2trt/converters/ReLU6.py index 8b5af6a1..c452693c 100644 --- a/torch2trt/converters/ReLU6.py +++ b/torch2trt/converters/ReLU6.py @@ -8,7 +8,7 @@ def convert_ReLU6(ctx): output = ctx.method_return input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input, 6]) - input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) layer = ctx.network.add_activation( input=input_a_trt, type=trt.ActivationType.RELU) @@ -20,4 +20,4 @@ def convert_ReLU6(ctx): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)]) def test_relu6_basic(): - return torch.nn.ReLU6() \ No newline at end of file + return torch.nn.ReLU6() diff --git a/torch2trt/converters/add.py b/torch2trt/converters/add.py index 62c93e7e..99118a21 100644 --- a/torch2trt/converters/add.py +++ b/torch2trt/converters/add.py @@ -11,7 +11,7 @@ def convert_add(ctx): input_b = ctx.method_args[1] output = ctx.method_return input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) - input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUM) output._trt = layer.get_output(0) @@ -106,4 +106,4 @@ def forward(self, x): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)]) def test_add_constant_batch(): - return AddConstantBatch() \ No newline at end of file + return AddConstantBatch() diff --git a/torch2trt/converters/cat.py b/torch2trt/converters/cat.py index 10d85c34..23f039e8 100644 --- a/torch2trt/converters/cat.py +++ b/torch2trt/converters/cat.py @@ -9,7 +9,7 @@ def convert_cat(ctx): output = ctx.method_return trt_inputs = add_missing_trt_tensors(ctx.network, inputs) - trt_inputs = broadcast_trt_tensors(ctx.network, trt_inputs, output.ndim - 1) + trt_inputs = broadcast_trt_tensors(ctx.network, trt_inputs, len(output.shape) - 1) layer = ctx.network.add_concatenation(inputs=trt_inputs) layer.axis = dim - 1 @@ -25,4 +25,4 @@ def forward(self, *x): @add_module_test(torch.float32, torch.device('cuda'), [(1, 4, 4), (1, 3, 4), (1, 17, 4)]) def test_Cat_basic(): - return Cat(1) \ No newline at end of file + return Cat(1) diff --git a/torch2trt/converters/compare.py b/torch2trt/converters/compare.py index b16f2f0f..fc2024ed 100644 --- a/torch2trt/converters/compare.py +++ b/torch2trt/converters/compare.py @@ -6,7 +6,7 @@ def convert_elementwise(ctx, op): input_b = ctx.method_args[1] output = ctx.method_return input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) - input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, op) output._trt = layer.get_output(0) diff --git a/torch2trt/converters/div.py b/torch2trt/converters/div.py index ba2e4f94..f7e95642 100644 --- a/torch2trt/converters/div.py +++ b/torch2trt/converters/div.py @@ -12,7 +12,7 @@ def convert_div(ctx): input_b = ctx.method_args[1] output = ctx.method_return input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) - input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.DIV) output._trt = layer.get_output(0) @@ -24,7 +24,7 @@ def convert_rdiv(ctx): input_b = ctx.method_args[0] output = ctx.method_return input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) - input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.DIV) output._trt = layer.get_output(0) @@ -120,4 +120,4 @@ def forward(self, x): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)]) def test_div_constant_batch(): - return DivConstantBatch() \ No newline at end of file + return DivConstantBatch() diff --git a/torch2trt/converters/getitem.py b/torch2trt/converters/getitem.py index 88877dea..92203d50 100644 --- a/torch2trt/converters/getitem.py +++ b/torch2trt/converters/getitem.py @@ -31,7 +31,7 @@ def convert_tensor_getitem(ctx): # Step 1 - Replace ellipsis with expanded slices - num_ellipsis = input.ndim - num_slice_types(slices) + num_ellipsis = len(input.shape) - num_slice_types(slices) new_slices = [] for s in slices: @@ -152,4 +152,4 @@ def test_tensor_getitem_2d_append_2dim(): @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 4, 3)]) def test_tensor_getitem_2d_weird_combo(): - return LambdaModule(lambda x: x[:, 0:3:4, None, None, 1, ...]) \ No newline at end of file + return LambdaModule(lambda x: x[:, 0:3:4, None, None, 1, ...]) diff --git a/torch2trt/converters/instance_norm.py b/torch2trt/converters/instance_norm.py index 0e8c9a13..5785fb20 100644 --- a/torch2trt/converters/instance_norm.py +++ b/torch2trt/converters/instance_norm.py @@ -58,7 +58,7 @@ def convert_instance_norm(ctx): eps_np = np.array([eps], dtype=np.float32) keep_dims = True - reduce_axes = torch_dim_to_trt_axes(tuple(range(2, input.ndim))) + reduce_axes = torch_dim_to_trt_axes(tuple(range(2, len(input.shape)))) # compute mean over spatial mean_trt = ctx.network.add_reduce(input._trt, trt.ReduceOperation.AVG, reduce_axes, keep_dims).get_output(0) @@ -147,4 +147,4 @@ def test_instance_norm_2d_dynamic_affine(): @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3, 3)]) def test_instance_norm_3d_dynamic_affine(): - return torch.nn.InstanceNorm3d(10, affine=True, track_running_stats=False) \ No newline at end of file + return torch.nn.InstanceNorm3d(10, affine=True, track_running_stats=False) diff --git a/torch2trt/converters/max.py b/torch2trt/converters/max.py index b7bbe97f..ac835638 100644 --- a/torch2trt/converters/max.py +++ b/torch2trt/converters/max.py @@ -8,14 +8,14 @@ def __convert_max_elementwise(ctx): input_b = ctx.method_args[1] output = ctx.method_return input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) - input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.MAX) output._trt = layer.get_output(0) def __convert_max_reduce(ctx): input = ctx.method_args[0] - dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1, input.ndim))) + dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1, len(input.shape)))) keepdim = get_arg(ctx, 'keepdim', pos=2, default=False) input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output_val = ctx.method_return[0] @@ -59,4 +59,4 @@ def forward(self, x, y): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3), (1,)]) # broadcast @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3), (1, 3, 3)]) # broadcast def test_max_elementwise(): - return MaxElementwise() \ No newline at end of file + return MaxElementwise() diff --git a/torch2trt/converters/min.py b/torch2trt/converters/min.py index f58d45dc..e8f891ad 100644 --- a/torch2trt/converters/min.py +++ b/torch2trt/converters/min.py @@ -8,14 +8,14 @@ def __convert_min_elementwise(ctx): input_b = ctx.method_args[1] output = ctx.method_return input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) - input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.MIN) output._trt = layer.get_output(0) def __convert_min_reduce(ctx): input = ctx.method_args[0] - dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1,input.ndim))) + dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1,len(input.shape)))) keepdim = get_arg(ctx, 'keepdim', pos=2, default=False) input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output_val = ctx.method_return[0] @@ -59,4 +59,4 @@ def forward(self, x, y): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3), (1,)]) # broadcast @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3), (1, 3, 3)]) # broadcast def test_min_elementwise(): - return MinElementwise() \ No newline at end of file + return MinElementwise() diff --git a/torch2trt/converters/mul.py b/torch2trt/converters/mul.py index 8e527817..eefd744c 100644 --- a/torch2trt/converters/mul.py +++ b/torch2trt/converters/mul.py @@ -11,7 +11,7 @@ def convert_mul(ctx): input_b = ctx.method_args[1] output = ctx.method_return input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) - input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.PROD) output._trt = layer.get_output(0) @@ -105,4 +105,4 @@ def forward(self, x): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)]) def test_mul_constant_batch(): - return MulConstantBatch() \ No newline at end of file + return MulConstantBatch() diff --git a/torch2trt/converters/pow.py b/torch2trt/converters/pow.py index 3949b615..357cdb77 100644 --- a/torch2trt/converters/pow.py +++ b/torch2trt/converters/pow.py @@ -10,7 +10,7 @@ def convert_pow(ctx): input_b = ctx.method_args[1] output = ctx.method_return input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) - input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.POW) output._trt = layer.get_output(0) @@ -21,7 +21,7 @@ def convert_pow(ctx): input_b = ctx.method_args[0] # flipped for rpow output = ctx.method_return input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) - input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.POW) output._trt = layer.get_output(0) @@ -89,4 +89,4 @@ def forward(self, x): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) def test_rpow_float(): - return RpowFloat() \ No newline at end of file + return RpowFloat() diff --git a/torch2trt/converters/prod.py b/torch2trt/converters/prod.py index 7f5afbc6..fda1d3e6 100644 --- a/torch2trt/converters/prod.py +++ b/torch2trt/converters/prod.py @@ -7,7 +7,7 @@ @tensorrt_converter('torch.Tensor.prod') def convert_prod(ctx): input = ctx.method_args[0] - dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1, input.ndim))) + dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1, len(input.shape)))) keepdim = get_arg(ctx, 'keepdim', pos=2, default=False) input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return @@ -35,4 +35,4 @@ def test_prod_reduce_dim22(): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) def test_prod_reduce_dim1_keepdim(): - return UnaryModule(lambda x: torch.prod(x, 1, keepdim=True)) \ No newline at end of file + return UnaryModule(lambda x: torch.prod(x, 1, keepdim=True)) diff --git a/torch2trt/converters/softmax.py b/torch2trt/converters/softmax.py index 2b98f67b..39cd627d 100644 --- a/torch2trt/converters/softmax.py +++ b/torch2trt/converters/softmax.py @@ -18,7 +18,7 @@ def convert_softmax(ctx): # import pdb # pdb.set_trace() if dim < 0: - dim = input.ndim + dim + dim = len(input.shape) + dim axes = 1 << (dim - 1) @@ -47,4 +47,4 @@ def test_softmax_module_neg1(): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) def test_softmax_module_dim_neg2(): - return torch.nn.Softmax(-2) \ No newline at end of file + return torch.nn.Softmax(-2) diff --git a/torch2trt/converters/sub.py b/torch2trt/converters/sub.py index d110c654..1ae2a124 100644 --- a/torch2trt/converters/sub.py +++ b/torch2trt/converters/sub.py @@ -10,7 +10,7 @@ def convert_sub(ctx): input_b = ctx.method_args[1] output = ctx.method_return input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) - input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUB) output._trt = layer.get_output(0) @@ -21,7 +21,7 @@ def convert_sub(ctx): input_b = ctx.method_args[0] # flipped for rsub output = ctx.method_return input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) - input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], output.ndim - 1) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUB) output._trt = layer.get_output(0) @@ -115,4 +115,4 @@ def forward(self, x): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10, 10)]) def test_sub_constant_batch(): - return SubConstantBatch() \ No newline at end of file + return SubConstantBatch() diff --git a/torch2trt/converters/sum.py b/torch2trt/converters/sum.py index 7b975c71..272cd433 100644 --- a/torch2trt/converters/sum.py +++ b/torch2trt/converters/sum.py @@ -7,7 +7,7 @@ @tensorrt_converter('torch.Tensor.sum') def convert_sum(ctx): input = ctx.method_args[0] - dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1, input.ndim))) + dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1, len(input.shape)))) keepdim = get_arg(ctx, 'keepdim', pos=2, default=False) input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return @@ -49,4 +49,4 @@ def forward(self, x): @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)]) @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 23, 23)]) def test_disparity_reg(): - return DisparityRegression(10) \ No newline at end of file + return DisparityRegression(10) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 9282e5e4..3ade89ad 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -140,7 +140,7 @@ def add_missing_trt_tensors(network, tensors): # remove all preceding ones, these can be re-inserted later when broadcasting num_preceding_ones = 0 - for j in range(t.ndim): + for j in range(len(t.shape)): if int(t.shape[j]) == 1: num_preceding_ones += 1 else: From b0cc8e77a0fbd61e96b971a66bbc11326f77c6b5 Mon Sep 17 00:00:00 2001 From: Alex Sergeev Date: Fri, 18 Sep 2020 14:52:02 -0700 Subject: [PATCH 332/355] Assert that inputs are contiguous (#418) * Assert that inputs are contiguous * Turn non-contiguous tensors into contiguous * Add unit test * Fix tabs --- torch2trt/tests/test_contiguous.py | 20 ++++++++++++++++++++ torch2trt/torch2trt.py | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 torch2trt/tests/test_contiguous.py diff --git a/torch2trt/tests/test_contiguous.py b/torch2trt/tests/test_contiguous.py new file mode 100644 index 00000000..f2047072 --- /dev/null +++ b/torch2trt/tests/test_contiguous.py @@ -0,0 +1,20 @@ +import torch +from torch2trt import torch2trt + + +def test_contiguous(): + net = torch.nn.Conv2d(3, 10, kernel_size=3) + net.eval().cuda() + + test_tensor = torch.randn((1, 25, 25, 3)).cuda().permute((0, 3, 1, 2)) + + with torch.no_grad(): + test_out = net(test_tensor) + + with torch.no_grad(): + trt_net = torch2trt(net, [test_tensor]) + test_trt_out = trt_net(test_tensor) + + delta = (test_out.contiguous() - test_trt_out.contiguous()).abs().sum() + assert delta < 1e-3, f"Delta: {delta}" + diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 3ade89ad..7e8d3278 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -427,7 +427,7 @@ def forward(self, *inputs): for i, input_name in enumerate(self.input_names): idx = self.engine.get_binding_index(input_name) - bindings[idx] = inputs[i].data_ptr() + bindings[idx] = inputs[i].contiguous().data_ptr() self.context.execute_async( batch_size, bindings, torch.cuda.current_stream().cuda_stream From a9a6a532149f812a588f06258a43c910e8d9efce Mon Sep 17 00:00:00 2001 From: John Date: Mon, 19 Oct 2020 12:07:33 -0400 Subject: [PATCH 333/355] Add layer names (#432) * Auto-generate custom layer names * fixed layer name count key * updated changelog for adding layer names Co-authored-by: Alex Sergeev --- CHANGELOG.md | 1 + torch2trt/torch2trt.py | 38 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e5c0007b..18dc5ebb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,5 +4,6 @@ ### Added +- Added names for TensorRT layers - Replaced Tensor.ndim references with len(tensor.shape) to support older pytorch versions - Added reduced precision documentation page diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 7e8d3278..7051e629 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -3,6 +3,7 @@ from copy import copy import numpy as np import io +from collections import defaultdict from .calibration import ( TensorBatchDataset, @@ -326,10 +327,43 @@ def default_input_names(num_inputs): def default_output_names(num_outputs): return ["output_%d" % i for i in range(num_outputs)] - + + +class LayerNamingNetworkWrapper(object): + def __init__(self, ctx, network): + self._ctx = ctx + self._network = network + self._layer_counts = defaultdict(lambda: 0) + + def _set_layer_name(self, layer): + def arg_str(arg): + if isinstance(arg, torch.Tensor): + return "tensor(shape=%s, dtype=%s)" % (str(list(arg.shape)), str(arg.dtype)) + return str(arg) + + self._layer_counts[layer.type.name] += 1 + args = [arg_str(arg) for arg in self._ctx.method_args] + kwargs = ["%s=%s" % (key, arg_str(arg)) for key, arg in self._ctx.method_kwargs.items()] + layer.name = "[%s #%d] %s(%s)" % (layer.type.name, self._layer_counts[layer.type.name], + self._ctx.method_str, ", ".join(args + kwargs)) + + def __getattr__(self, name): + attr = getattr(self._network, name) + if callable(attr): + def wrapper(*args, **kwargs): + ret = attr(*args, **kwargs) + if isinstance(ret, trt.ILayer): + self._set_layer_name(ret) + return ret + + return wrapper + else: + return attr + + class ConversionContext(object): def __init__(self, network, converters=CONVERTERS): - self.network = network + self.network = LayerNamingNetworkWrapper(self, network) self.lock = False self.method_args = None self.method_kwargs = None From d1fa6f9f20c6c4c57a9486680ab38c45d0d94ec3 Mon Sep 17 00:00:00 2001 From: John Date: Wed, 4 Nov 2020 13:45:36 -0500 Subject: [PATCH 334/355] Sandeepkumar skb groupnorm plugin (#437) * added plugin for GroupNorm Co-authored-by: sandeepkumar-skb --- CHANGELOG.md | 1 + build.py | 1 + setup.py | 5 +- torch2trt/converters/__init__.py | 1 + torch2trt/converters/group_norm.py | 48 +++++ torch2trt/plugins/group_norm.cpp | 296 +++++++++++++++++++++++++++++ torch2trt/plugins/interpolate.cpp | 15 +- torch2trt/plugins/plugins.cpp | 30 +++ 8 files changed, 380 insertions(+), 17 deletions(-) create mode 100644 torch2trt/converters/group_norm.py create mode 100644 torch2trt/plugins/group_norm.cpp create mode 100644 torch2trt/plugins/plugins.cpp diff --git a/CHANGELOG.md b/CHANGELOG.md index 18dc5ebb..1449ed5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,5 +5,6 @@ ### Added - Added names for TensorRT layers +- Added GroupNorm plugin which internally uses PyTorch aten::group_norm - Replaced Tensor.ndim references with len(tensor.shape) to support older pytorch versions - Added reduced precision documentation page diff --git a/build.py b/build.py index fb3bb3c5..3bfcf1df 100644 --- a/build.py +++ b/build.py @@ -5,6 +5,7 @@ PLUGINS = [ 'interpolate', + 'group_norm', ] BASE_FOLDER = 'torch2trt/converters' diff --git a/setup.py b/setup.py index 75fa4237..68879132 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ def trt_lib_dir(): plugins_ext_module = CUDAExtension( name='plugins', sources=[ - 'torch2trt/plugins/interpolate.cpp' + 'torch2trt/plugins/plugins.cpp' ], include_dirs=[ trt_inc_dir() @@ -29,8 +29,7 @@ def trt_lib_dir(): 'cxx': ['-DUSE_DEPRECATED_INTLIST'] if torch.__version__ < "1.5" else [], 'nvcc': [] } -) - + ) if '--plugins' in sys.argv: ext_modules.append(plugins_ext_module) sys.argv.remove('--plugins') diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 7e50ab3e..663710e0 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -32,6 +32,7 @@ from .identity import * from .instance_norm import * from .interpolate import * +from .group_norm import * from .max import * from .max_pool2d import * from .mean import * diff --git a/torch2trt/converters/group_norm.py b/torch2trt/converters/group_norm.py new file mode 100644 index 00000000..6b7c37c3 --- /dev/null +++ b/torch2trt/converters/group_norm.py @@ -0,0 +1,48 @@ +import torch.nn as nn +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + +def has_group_norm_plugin(): + try: + from torch2trt.plugins import GroupNormPlugin + return True + except: + return False + + +def get_group_norm_plugin(num_groups, weight, bias, eps): + from torch2trt.plugins import GroupNormPlugin + PLUGIN_NAME = 'group_norm' + registry = trt.get_plugin_registry() + creator = [c for c in registry.plugin_creator_list if c.name == PLUGIN_NAME and c.plugin_namespace == 'torch2trt'][0] + torch2trt_plugin = GroupNormPlugin(num_groups=num_groups, weight=weight, bias=bias, eps=eps) + return creator.deserialize_plugin(PLUGIN_NAME, torch2trt_plugin.serializeToString()) + +@tensorrt_converter('torch.nn.GroupNorm.forward', has_group_norm_plugin()) +def convert_group_norm_trt(ctx): + module = ctx.method_args[0] + input = ctx.method_args[1] + num_groups = module.num_groups + weight = module.weight + bias = module.bias + eps = module.eps + input_trt = add_missing_trt_tensors(ctx.network, [input]) + output = ctx.method_return + plugin = get_group_norm_plugin(num_groups, weight, bias, eps) + + layer = ctx.network.add_plugin_v2(input_trt, plugin) + + output._trt = layer.get_output(0) + + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], has_group_norm_plugin()) +def test_group_norm_trt_g2_fp32(): + return torch.nn.GroupNorm(2, 10) + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 112, 112)], has_group_norm_plugin()) +def test_group_norm_trt_g2_eps_fp32(): + return torch.nn.GroupNorm(2, 10, eps=1e-4) + + + diff --git a/torch2trt/plugins/group_norm.cpp b/torch2trt/plugins/group_norm.cpp new file mode 100644 index 00000000..ccc7b51d --- /dev/null +++ b/torch2trt/plugins/group_norm.cpp @@ -0,0 +1,296 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace nvinfer1; + +namespace torch2trt { + +class GroupNormPlugin : public IPluginV2 { +private: + // configured by class + at::TensorOptions tensor_options; + std::vector input_sizes; + std::vector output_sizes; + DataType dtype; + + // group norm parameters, configured by user + int64_t num_groups; + at::Tensor weight; + at::Tensor bias; + double eps; + + +public: + + // create from arguments + GroupNormPlugin(int64_t num_groups, at::Tensor weight, at::Tensor bias, double eps) : + num_groups{num_groups}, weight{weight}, bias{bias}, eps{eps} + {} + + GroupNormPlugin(const char *data, size_t length) : GroupNormPlugin(std::string(data, length)) {} + + GroupNormPlugin(const std::string &data){ + deserializeFromString(data); + } + + void deserializeFromString(const std::string &data) { + std::istringstream data_stream(data); + torch::serialize::InputArchive input_archive; + input_archive.load_from(data_stream); + { + torch::IValue value; + input_archive.read("num_groups", value); +#ifdef USE_DEPRECATED_INTLIST + num_groups = value.toIntListRef().vec(); +#else + num_groups = value.toInt(); +#endif + } + { + torch::IValue value; + input_archive.read("weight", value); + weight = value.toTensor(); + } + { + torch::IValue value; + input_archive.read("bias", value); + bias = value.toTensor(); + } + + { + torch::IValue value; + input_archive.read("eps", value); +#ifdef USE_DEPRECATED_INTLIST + eps = value.toDoubleListRef().vec(); +#else + eps = value.toDouble(); +#endif + } + { + torch::IValue value; + input_archive.read("dtype", value); + dtype = (DataType) value.toInt(); + } + { + torch::IValue value; + input_archive.read("input_sizes", value); +#ifdef USE_DEPRECATED_INTLIST + input_sizes = value.toIntListRef().vec(); +#else + input_sizes = value.toIntVector(); +#endif + } + { + torch::IValue value; + input_archive.read("output_sizes", value); +#ifdef USE_DEPRECATED_INTLIST + output_sizes = value.toIntListRef().vec(); +#else + output_sizes = value.toIntVector(); +#endif + } + } + std::string serializeToString() const { + torch::serialize::OutputArchive output_archive; + output_archive.write("num_groups", torch::IValue(num_groups)); + output_archive.write("weight", torch::IValue(weight)); + output_archive.write("bias", torch::IValue(bias)); + output_archive.write("eps", torch::IValue(eps)); + output_archive.write("dtype", torch::IValue((int) dtype)); + output_archive.write("input_sizes", torch::IValue(input_sizes)); + output_archive.write("output_sizes", torch::IValue(output_sizes)); + std::ostringstream data_str; + output_archive.save_to(data_str); + return data_str.str(); + } + + const char* getPluginType() const override { + return "group_norm"; + }; + + const char* getPluginVersion() const override { + return "1"; + } + + int getNbOutputs() const override { + return 1; + } + + Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override { + Dims dims; + dims.nbDims = inputs->nbDims; + + for (int i = 0; i < inputs->nbDims; i++) { + dims.d[i] = inputs->d[i]; + } + + return dims; + } + + bool supportsFormat(DataType type, PluginFormat format) const override { + if (format != PluginFormat::kNCHW) { + return false; + } + if (type == DataType::kINT32 || type == DataType::kINT8) { + return false; + } + return true; + } + + void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, + int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) override { + + // set data type + if (type == DataType::kFLOAT) { + tensor_options = tensor_options.dtype(c10::kFloat); + dtype = type; + } else if (type == DataType::kHALF) { + tensor_options = tensor_options.dtype(c10::kHalf); + dtype = type; + } + + // set input sizes + input_sizes.resize(inputDims[0].nbDims); + for (int i = 0; i < inputDims[0].nbDims; i++) { + input_sizes[i] = inputDims[0].d[i]; + } + + // set output sizes + output_sizes.resize(outputDims[0].nbDims); + for (int i = 0; i < outputDims[0].nbDims; i++) { + output_sizes[i] = outputDims[0].d[i]; + } + } + + int initialize() override { + // set device + tensor_options = tensor_options.device(c10::kCUDA); + + // set data type + if (dtype == DataType::kFLOAT) { + tensor_options = tensor_options.dtype(c10::kFloat); + } else if (dtype == DataType::kHALF) { + tensor_options = tensor_options.dtype(c10::kHalf); + } + + + weight = weight.to(tensor_options); + bias = bias.to(tensor_options); + + return 0; + } + + void terminate() override {} + + size_t getWorkspaceSize(int maxBatchSize) const override { return 0; } + + int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) override { + // get input / output dimensions + std::vector batch_input_sizes = input_sizes; + std::vector batch_output_sizes = output_sizes; + batch_input_sizes.insert(batch_input_sizes.begin(), batchSize); + batch_output_sizes.insert(batch_output_sizes.begin(), batchSize); + + // create tensor wrappers + at::Tensor input = at::from_blob((void*) inputs[0], batch_input_sizes, [](void*){}, tensor_options); + at::Tensor output = at::from_blob(outputs[0], batch_output_sizes, [](void*){}, tensor_options); + + // create new torch cuda stream + at::cuda::CUDAStream torch_stream = at::cuda::getStreamFromPool(); + at::cuda::CUDAStreamGuard torch_guard(torch_stream); + + // capture current work on tensorrt cuda stream + cudaEvent_t event; + cudaEventCreate(&event); + cudaEventRecord(event, stream); + + // make torch cuda stream wait on tensorrt work + cudaStreamWaitEvent(torch_stream.stream(), event, 0); + + + + // enqueue work + // Group_norm function from PyTorch: https://pytorch.org/cppdocs/api/function_namespaceat_1a6bc1e9504ea440c6c96ff8a8b94333f2.html#exhale-function-namespaceat-1a6bc1e9504ea440c6c96ff8a8b94333f2 + at::Tensor output_tmp = at::group_norm(input, num_groups, weight, bias, eps=eps); + output.copy_(output_tmp); + + // capture event on enqueued stream + cudaEvent_t torch_event; + cudaEventCreate(&torch_event); + cudaEventRecord(torch_event, torch_stream.stream()); + cudaStreamWaitEvent(stream, torch_event, 0); + + cudaEventDestroy(event); + cudaEventDestroy(torch_event); + + return 0; + } + + + size_t getSerializationSize() const override { + return serializeToString().size(); + } + + void serialize(void* buffer) const override { + std::string data = serializeToString(); + size_t size = getSerializationSize(); + data.copy((char *) buffer, size); + } + + void destroy() override {} + + IPluginV2* clone() const override { + return new GroupNormPlugin(num_groups, weight, bias, eps); + } + + void setPluginNamespace(const char* pluginNamespace) override {} + + const char *getPluginNamespace() const override { + return "torch2trt"; + } + +}; + +class GroupNormPluginCreator : public IPluginCreator { +public: + GroupNormPluginCreator() {} + + const char *getPluginNamespace() const override { + return "torch2trt"; + } + + const char *getPluginName() const override { + return "group_norm"; + } + + const char *getPluginVersion() const override { + return "1"; + } + + IPluginV2 *deserializePlugin(const char *name, const void *data, size_t length) override { + return new GroupNormPlugin((const char*) data, length); + } + + void setPluginNamespace(const char *N) override {} + const PluginFieldCollection *getFieldNames() override { return nullptr; } + + IPluginV2 *createPlugin(const char *name, const PluginFieldCollection *fc) override { return nullptr; } + +}; + + +REGISTER_TENSORRT_PLUGIN(GroupNormPluginCreator); + +} // namespace torch2trt + + + + diff --git a/torch2trt/plugins/interpolate.cpp b/torch2trt/plugins/interpolate.cpp index 9cd0bb83..cf463d0f 100644 --- a/torch2trt/plugins/interpolate.cpp +++ b/torch2trt/plugins/interpolate.cpp @@ -281,17 +281,4 @@ class InterpolatePluginCreator : public IPluginCreator { REGISTER_TENSORRT_PLUGIN(InterpolatePluginCreator); - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - py::class_(m, "InterpolatePlugin") - .def(py::init, std::string, bool>(), py::arg("size"), py::arg("mode"), py::arg("align_corners")) - .def(py::init(), py::arg("data")) - .def("getSerializationSize", &InterpolatePlugin::getSerializationSize) - .def("deserializeFromString", &InterpolatePlugin::deserializeFromString) - .def("serializeToString", [](const InterpolatePlugin& plugin) { - std::string data = plugin.serializeToString(); - return py::bytes(data); - }); -} - -} // namespace torch2trt \ No newline at end of file +} // namespace torch2trt diff --git a/torch2trt/plugins/plugins.cpp b/torch2trt/plugins/plugins.cpp new file mode 100644 index 00000000..f548949f --- /dev/null +++ b/torch2trt/plugins/plugins.cpp @@ -0,0 +1,30 @@ +#include +#include "interpolate.cpp" +#include "group_norm.cpp" + + +using namespace nvinfer1; + +namespace torch2trt { + PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + py::class_(m, "InterpolatePlugin") + .def(py::init, std::string, bool>(), py::arg("size"), py::arg("mode"), py::arg("align_corners")) + .def(py::init(), py::arg("data")) + .def("getSerializationSize", &InterpolatePlugin::getSerializationSize) + .def("deserializeFromString", &InterpolatePlugin::deserializeFromString) + .def("serializeToString", [](const InterpolatePlugin& plugin) { + std::string data = plugin.serializeToString(); + return py::bytes(data); + }); + py::class_(m, "GroupNormPlugin") + .def(py::init(), py::arg("num_groups"), py::arg("weight"), py::arg("bias"), py::arg("eps")) + .def(py::init(), py::arg("data")) + .def("getSerializationSize", &GroupNormPlugin::getSerializationSize) + .def("deserializeFromString", &GroupNormPlugin::deserializeFromString) + .def("serializeToString", [](const GroupNormPlugin& plugin) { + std::string data = plugin.serializeToString(); + return py::bytes(data); + }); + + } +} // namespace torch2trt From adccbf14c77ba73dd064653c0461739e3b2191ac Mon Sep 17 00:00:00 2001 From: John Date: Tue, 17 Nov 2020 15:06:09 -0800 Subject: [PATCH 335/355] Koenvandesande remove duplicate filenames (#448) * Remove duplicate filenames which do not work on Windows by merging files * Fix * relu tests Co-authored-by: Koen van de Sande --- torch2trt/converters/Identity.py | 11 --------- torch2trt/converters/ReLU.py | 11 --------- torch2trt/converters/ReLU6.py | 23 ------------------- torch2trt/converters/__init__.py | 3 --- torch2trt/converters/identity.py | 12 +++++++++- torch2trt/converters/relu.py | 30 ++++++++++++++++++++++--- torch2trt/converters/relu6.py | 38 +++++++++++++++++++++++++++++--- 7 files changed, 73 insertions(+), 55 deletions(-) delete mode 100644 torch2trt/converters/Identity.py delete mode 100644 torch2trt/converters/ReLU.py delete mode 100644 torch2trt/converters/ReLU6.py diff --git a/torch2trt/converters/Identity.py b/torch2trt/converters/Identity.py deleted file mode 100644 index 1934666d..00000000 --- a/torch2trt/converters/Identity.py +++ /dev/null @@ -1,11 +0,0 @@ -from torch2trt.torch2trt import * - - -@tensorrt_converter('torch.nn.Dropout.forward') -@tensorrt_converter('torch.nn.Dropout2d.forward') -@tensorrt_converter('torch.nn.Dropout3d.forward') -def convert_Identity(ctx): - input = ctx.method_args[1] - input_trt = add_missing_trt_tensors(ctx.network, [input])[0] - output = ctx.method_return - output._trt = input_trt \ No newline at end of file diff --git a/torch2trt/converters/ReLU.py b/torch2trt/converters/ReLU.py deleted file mode 100644 index 481f4b8a..00000000 --- a/torch2trt/converters/ReLU.py +++ /dev/null @@ -1,11 +0,0 @@ -from torch2trt.torch2trt import * - - -@tensorrt_converter('torch.nn.ReLU.forward') -def convert_ReLU(ctx): - input = ctx.method_args[1] - input_trt = add_missing_trt_tensors(ctx.network, [input])[0] - output = ctx.method_return - layer = ctx.network.add_activation( - input=input_trt, type=trt.ActivationType.RELU) - output._trt = layer.get_output(0) \ No newline at end of file diff --git a/torch2trt/converters/ReLU6.py b/torch2trt/converters/ReLU6.py deleted file mode 100644 index c452693c..00000000 --- a/torch2trt/converters/ReLU6.py +++ /dev/null @@ -1,23 +0,0 @@ -from torch2trt.torch2trt import * -from torch2trt.module_test import add_module_test - - -@tensorrt_converter('torch.nn.ReLU6.forward') -def convert_ReLU6(ctx): - input = ctx.method_args[1] - output = ctx.method_return - - input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input, 6]) - input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) - - layer = ctx.network.add_activation( - input=input_a_trt, type=trt.ActivationType.RELU) - layer = ctx.network.add_elementwise( - layer.get_output(0), input_b_trt, trt.ElementWiseOperation.MIN) - - output._trt = layer.get_output(0) - - -@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)]) -def test_relu6_basic(): - return torch.nn.ReLU6() diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 663710e0..08164476 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -12,11 +12,8 @@ from .Conv2d import * from .ConvTranspose import * from .ConvTranspose2d import * -from .Identity import * from .Linear import * from .LogSoftmax import * -from .ReLU import * -from .ReLU6 import * from .activation import * from .adaptive_avg_pool2d import * from .adaptive_max_pool2d import * diff --git a/torch2trt/converters/identity.py b/torch2trt/converters/identity.py index bac1bd99..f7ef1f97 100644 --- a/torch2trt/converters/identity.py +++ b/torch2trt/converters/identity.py @@ -5,8 +5,18 @@ @tensorrt_converter('torch.nn.functional.dropout') @tensorrt_converter('torch.nn.functional.dropout2d') @tensorrt_converter('torch.nn.functional.dropout3d') -def convert_identity(ctx): +def convert_functional_identity(ctx): input = ctx.method_args[0] input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return output._trt = input_trt + + +@tensorrt_converter('torch.nn.Dropout.forward') +@tensorrt_converter('torch.nn.Dropout2d.forward') +@tensorrt_converter('torch.nn.Dropout3d.forward') +def convert_identity(ctx): + input = ctx.method_args[1] + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] + output = ctx.method_return + output._trt = input_trt diff --git a/torch2trt/converters/relu.py b/torch2trt/converters/relu.py index 37f71167..c58405c1 100644 --- a/torch2trt/converters/relu.py +++ b/torch2trt/converters/relu.py @@ -1,11 +1,35 @@ from torch2trt.torch2trt import * -from .ReLU import * +from torch2trt.module_test import add_module_test @tensorrt_converter('torch.relu') @tensorrt_converter('torch.relu_') @tensorrt_converter('torch.nn.functional.relu') @tensorrt_converter('torch.nn.functional.relu_') -def convert_relu(ctx): +def convert_functional_relu(ctx): ctx.method_args = (torch.nn.ReLU(),) + ctx.method_args - convert_ReLU(ctx) \ No newline at end of file + convert_relu(ctx) + + +@tensorrt_converter('torch.nn.ReLU.forward') +def convert_relu(ctx): + input = ctx.method_args[1] + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] + output = ctx.method_return + layer = ctx.network.add_activation( + input=input_trt, type=trt.ActivationType.RELU) + output._trt = layer.get_output(0) + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)]) +def test_relu_basic(): + return torch.nn.ReLU() + + +class FunctionalRelu(torch.nn.Module): + def forward(self, x): + return torch.nn.functional.relu(x) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)]) +def test_functional_relu_basic(): + return FunctionalRelu() \ No newline at end of file diff --git a/torch2trt/converters/relu6.py b/torch2trt/converters/relu6.py index fc4e6ec0..0d809aa4 100644 --- a/torch2trt/converters/relu6.py +++ b/torch2trt/converters/relu6.py @@ -1,8 +1,40 @@ from torch2trt.torch2trt import * -from .ReLU6 import * +from torch2trt.module_test import add_module_test @tensorrt_converter('torch.nn.functional.relu6') -def convert_relu6(ctx): +def convert_functional_relu6(ctx): ctx.method_args = (torch.nn.ReLU6(),) + ctx.method_args - convert_ReLU6(ctx) \ No newline at end of file + convert_relu6(ctx) + + +@tensorrt_converter('torch.nn.ReLU6.forward') +def convert_relu6(ctx): + input = ctx.method_args[1] + output = ctx.method_return + + input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input, 6]) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) + + layer = ctx.network.add_activation( + input=input_a_trt, type=trt.ActivationType.RELU) + layer = ctx.network.add_elementwise( + layer.get_output(0), input_b_trt, trt.ElementWiseOperation.MIN) + + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)]) +def test_relu6_basic(): + return torch.nn.ReLU6() + + +class FunctionalRelu6(torch.nn.Module): + def forward(self, x): + return torch.nn.functional.relu6(x) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)]) +def test_functional_relu6_basic(): + return FunctionalRelu6() + From 81024ccd7c90412fa8320eb991f3454f0be3d7a0 Mon Sep 17 00:00:00 2001 From: John Date: Tue, 1 Dec 2020 16:19:00 -0800 Subject: [PATCH 336/355] allow direct method setting to support custom layers (#460) --- torch2trt/torch2trt.py | 68 ++++++++++++++++++++++++++++-------------- 1 file changed, 46 insertions(+), 22 deletions(-) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 7051e629..f5c9326f 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -1,9 +1,10 @@ import torch import tensorrt as trt -from copy import copy +import copy import numpy as np import io from collections import defaultdict +import importlib from .calibration import ( TensorBatchDataset, @@ -297,30 +298,24 @@ def wrapper(*args, **kwargs): class ConversionHook(object): """Attaches TensorRT converter to PyTorch method call""" - def __init__(self, ctx, method, converter): + def __init__(self, ctx, key, converter): self.ctx = ctx - self.method_str = method + self.key = key self.converter = converter def _set_method(self, method): - exec("%s = method" % self.method_str) + module = self.converter['module'] + exec('module.%s = method' % self.converter['qual_name']) def __enter__(self): - try: - self.method_impl = eval(self.method_str) - except AttributeError: - self.method_impl = None - - if self.method_impl: - self._set_method( - attach_converter( - self.ctx, self.method_impl, self.converter, self.method_str - ) + self._set_method( + attach_converter( + self.ctx, self.converter['method_impl'], self.converter, self.converter['method_str'] ) + ) def __exit__(self, type, val, tb): - if self.method_impl: - self._set_method(self.method_impl) + self._set_method(self.converter['method_impl']) def default_input_names(num_inputs): return ["input_%d" % i for i in range(num_inputs)] @@ -369,8 +364,8 @@ def __init__(self, network, converters=CONVERTERS): self.method_kwargs = None self.method_return = None self.hooks = [ - ConversionHook(self, method, converter) - for method, converter in converters.items() + ConversionHook(self, key, converter) + for key, converter in converters.items() ] def __enter__(self): @@ -569,11 +564,40 @@ def torch2trt(module, # DEFINE ALL CONVERSION FUNCTIONS +def get_module_qualname(name): + s = name.split('.') + + for i in range(len(s)): + idx = len(s) - i - 1 + modulename, qualname = ".".join(s[:idx]), ".".join(s[idx:]) + try: + module = importlib.import_module(modulename) + return module, modulename, qualname + except: + pass + + raise RuntimeError("Could not import module") + -def tensorrt_converter(method, is_real=True, enabled=True): - +def tensorrt_converter(method, is_real=True, enabled=True, imports=[]): + + if isinstance(method, str): + module, module_name, qual_name = get_module_qualname(method) + else: + module, module_name, qual_name = importlib.import_module(method.__module__), method.__module__, method.__qualname__ + + method_impl = eval('copy.deepcopy(module.%s)' % qual_name) + def register_converter(converter): - CONVERTERS[method] = {"converter": converter, "is_real": is_real} + CONVERTERS[method] = { + "converter": converter, + "is_real": is_real, + "module": module, + "module_name": module_name, + "qual_name": qual_name, + "method_str": module_name + '.' + qual_name, + "method_impl": method_impl + } return converter def pass_converter(converter): @@ -584,4 +608,4 @@ def pass_converter(converter): else: return pass_converter - return register_converter + return register_converter \ No newline at end of file From 2b1827eeb153761e773b0770b56364cda564a362 Mon Sep 17 00:00:00 2001 From: John Date: Tue, 1 Dec 2020 16:26:23 -0800 Subject: [PATCH 337/355] custom converter to changelog (#462) --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1449ed5c..6e672c02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Added +- Added support for custom converters for methods defined outside of ``torch`` module - Added names for TensorRT layers - Added GroupNorm plugin which internally uses PyTorch aten::group_norm - Replaced Tensor.ndim references with len(tensor.shape) to support older pytorch versions From 033df0c94d311e72e8c466a0736cd3ebd5fd7584 Mon Sep 17 00:00:00 2001 From: John Date: Wed, 13 Jan 2021 14:06:52 -0800 Subject: [PATCH 338/355] added expand converter (#487) --- CHANGELOG.md | 1 + torch2trt/converters/__init__.py | 1 + torch2trt/converters/expand.py | 43 ++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+) create mode 100644 torch2trt/converters/expand.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e672c02..df0fb9ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Added +- Added converter for ``torch.Tensor.expand`` - Added support for custom converters for methods defined outside of ``torch`` module - Added names for TensorRT layers - Added GroupNorm plugin which internally uses PyTorch aten::group_norm diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 08164476..d6ae6876 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -25,6 +25,7 @@ from .clamp import * from .compare import * from .div import * +from .expand import * from .getitem import * from .identity import * from .instance_norm import * diff --git a/torch2trt/converters/expand.py b/torch2trt/converters/expand.py new file mode 100644 index 00000000..e0d07540 --- /dev/null +++ b/torch2trt/converters/expand.py @@ -0,0 +1,43 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.Tensor.expand') +def convert_expand(ctx): + input = ctx.method_args[0] + sizes = ctx.method_args[1:] + output = ctx.method_return + + inshape = tuple(input.shape)[1:] # exclude batch + shape = tuple(output.shape)[1:] + ndim = len(shape) + start = tuple([0]*ndim) + stride = tuple([int(i == o) for i, o in zip(inshape, shape)]) # stride == 1 if dimensions match, 0 otherwise + + layer = ctx.network.add_slice(input._trt, start, shape, stride) + + output._trt = layer.get_output(0) + + +class ExpandModule(torch.nn.Module): + def __init__(self, *sizes): + super(ExpandModule, self).__init__() + self.sizes = sizes + + def forward(self, x): + return x.expand(*self.sizes) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,3,3)]) +def test_tensor_expand_singledim(): + return ExpandModule(1, 3, 3, 3) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,1,3)]) +def test_tensor_expand_multidim(): + return ExpandModule(1, 3, 3, 3) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1,1,1,3)]) +def test_tensor_expand_inferdim(): + return ExpandModule(1, 3, -1, -1) \ No newline at end of file From 3e4ed64db1cf4c0ce5f33fad4841661956509fc4 Mon Sep 17 00:00:00 2001 From: meremeev Date: Mon, 22 Feb 2021 11:26:56 -0800 Subject: [PATCH 339/355] Additional converters for floordiv, mod, ne, and torch::tensor() operations (#505) * Initioal version of ne, floordiv, mod and tensor converters. Extend ops for relu and sigmoid. * Converters for floordiv, mod, ne, and torch::tensor() operations . Extend relu and sigmoid converters to Tensor methods. * Update CHANGELOG.md --- CHANGELOG.md | 3 + torch2trt/converters/__init__.py | 4 ++ torch2trt/converters/floordiv.py | 81 ++++++++++++++++++++++++++ torch2trt/converters/mod.py | 99 ++++++++++++++++++++++++++++++++ torch2trt/converters/ne.py | 54 +++++++++++++++++ torch2trt/converters/relu.py | 16 +++++- torch2trt/converters/sigmoid.py | 16 +++++- torch2trt/converters/tensor.py | 22 +++++++ 8 files changed, 293 insertions(+), 2 deletions(-) create mode 100644 torch2trt/converters/floordiv.py create mode 100644 torch2trt/converters/mod.py create mode 100644 torch2trt/converters/ne.py create mode 100644 torch2trt/converters/tensor.py diff --git a/CHANGELOG.md b/CHANGELOG.md index df0fb9ff..c38b0139 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,3 +10,6 @@ - Added GroupNorm plugin which internally uses PyTorch aten::group_norm - Replaced Tensor.ndim references with len(tensor.shape) to support older pytorch versions - Added reduced precision documentation page +- Added converters for ``floordiv``, ``mod``, ``ne``, and ``torch.tensor`` operations +- Extended ``relu`` converter to support ``Tensor.relu`` operation +- Extended ``sigmoid`` converter to support ``Tensor.sigmoid`` operation diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index d6ae6876..85ca6cf9 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -26,6 +26,7 @@ from .compare import * from .div import * from .expand import * +from .floordiv import * from .getitem import * from .identity import * from .instance_norm import * @@ -35,8 +36,10 @@ from .max_pool2d import * from .mean import * from .min import * +from .mod import * from .mul import * from .normalize import * +from .ne import * from .narrow import * from .pad import * from .permute import * @@ -52,6 +55,7 @@ from .sub import * from .sum import * from .tanh import * +from .tensor import * from .transpose import * from .unary import * from .view import * diff --git a/torch2trt/converters/floordiv.py b/torch2trt/converters/floordiv.py new file mode 100644 index 00000000..9864ca4e --- /dev/null +++ b/torch2trt/converters/floordiv.py @@ -0,0 +1,81 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.Tensor.__floordiv__') +@tensorrt_converter('torch.Tensor.__ifloordiv__') +@tensorrt_converter('torch.floor_divide') +def convert_floordiv(ctx): + input_a = ctx.method_args[0] + input_b = ctx.method_args[1] + output = ctx.method_return + input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) + # we can not use ElementWiseOperation.FLOOR_DIV directly because Torch truncate negative result toward 0 + # but TensorRT FLOOR_DIV op toward -Inf + # sign = ab / |ab| + # floordiv result: sign * (|a| // |b|) + ab_layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.PROD) + abs_ab_layer = ctx.network.add_unary(ab_layer.get_output(0), trt.UnaryOperation.ABS) + sign_layer = ctx.network.add_elementwise(ab_layer.get_output(0), abs_ab_layer.get_output(0), + trt.ElementWiseOperation.DIV) + abs_a_layer = ctx.network.add_unary(input_a_trt, trt.UnaryOperation.ABS) + abs_b_layer = ctx.network.add_unary(input_b_trt, trt.UnaryOperation.ABS) + abs_floor_layer = ctx.network.add_elementwise(abs_a_layer.get_output(0), abs_b_layer.get_output(0), + trt.ElementWiseOperation.FLOOR_DIV) + out_layer = ctx.network.add_elementwise(sign_layer.get_output(0), abs_floor_layer.get_output(0), + trt.ElementWiseOperation.PROD) + output._trt = out_layer.get_output(0) + + +class FloorDiv(torch.nn.Module): + def __init__(self): + super(FloorDiv, self).__init__() + + def forward(self, x, y): + return x // y + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 1, 20)]) +def test_floordiv_op(): + return FloorDiv() + + +class FloorDivAssign (torch.nn.Module): + def __init__(self): + super(FloorDivAssign, self).__init__() + + def forward(self, x, y): + x //= y + return x + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 1, 20)]) +def test_floordiv_op_assign(): + return FloorDivAssign() + + +class FloorDivConst(torch.nn.Module): + def __init__(self): + super(FloorDivConst, self).__init__() + + def forward(self, x): + return x // 2. + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20)]) +def test_floordiv_op_const(): + return FloorDivConst() + + +class TorchFloorDiv(torch.nn.Module): + def __init__(self): + super(TorchFloorDiv, self).__init__() + + def forward(self, x, y): + return torch.floor_divide(x, y) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 1, 20)]) +def test_floordiv_func(): + return TorchFloorDiv() diff --git a/torch2trt/converters/mod.py b/torch2trt/converters/mod.py new file mode 100644 index 00000000..6cf69435 --- /dev/null +++ b/torch2trt/converters/mod.py @@ -0,0 +1,99 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.fmod') +def convert_mod(ctx): + input_a = ctx.method_args[0] + input_b = ctx.method_args[1] + output = ctx.method_return + input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) + # we can not use ElementWiseOperation.FLOOR_DIV directly because Torch truncate negative result toward 0 + # but TensorRT FLOOR_DIV op toward -Inf + # sign = ab / |ab| + # floordiv result: sign * (|a| // |b|) + ab_layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.PROD) + abs_ab_layer = ctx.network.add_unary(ab_layer.get_output(0), trt.UnaryOperation.ABS) + sign_layer = ctx.network.add_elementwise(ab_layer.get_output(0), abs_ab_layer.get_output(0), + trt.ElementWiseOperation.DIV) + abs_a_layer = ctx.network.add_unary(input_a_trt, trt.UnaryOperation.ABS) + abs_b_layer = ctx.network.add_unary(input_b_trt, trt.UnaryOperation.ABS) + abs_floor_layer = ctx.network.add_elementwise(abs_a_layer.get_output(0), abs_b_layer.get_output(0), + trt.ElementWiseOperation.FLOOR_DIV) + # a % b = a - (a//b) * b + floordiv_layer = ctx.network.add_elementwise(sign_layer.get_output(0), abs_floor_layer.get_output(0), + trt.ElementWiseOperation.PROD) + prod_layer = ctx.network.add_elementwise(floordiv_layer.get_output(0), input_b_trt, trt.ElementWiseOperation.PROD) + sub_layer = ctx.network.add_elementwise(input_a_trt, prod_layer.get_output(0), trt.ElementWiseOperation.SUB) + output._trt = sub_layer.get_output(0) + + +@tensorrt_converter('torch.Tensor.__mod__') +# we need separate converter for operator because for some reason Torch use truncation toward -Inf for this op. +# bug is filed: https://github.com/pytorch/pytorch/issues/52425 +# but for now we have to convert model exactly +def convert_mod(ctx): + input_a = ctx.method_args[0] + input_b = ctx.method_args[1] + output = ctx.method_return + input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) + # a % b = a - (a//b) * b + floordiv_layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.FLOOR_DIV) + prod_layer = ctx.network.add_elementwise(floordiv_layer.get_output(0), input_b_trt, trt.ElementWiseOperation.PROD) + mod_layer = ctx.network.add_elementwise(input_a_trt, prod_layer.get_output(0), trt.ElementWiseOperation.SUB) + output._trt = mod_layer.get_output(0) + + +class Mod(torch.nn.Module): + def __init__(self): + super(Mod, self).__init__() + + def forward(self, x, y): + return x % y + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 1, 20)]) +def test_mod_op(): + return Mod() + + +class ModAssign(torch.nn.Module): + def __init__(self): + super(ModAssign, self).__init__() + + def forward(self, x, y): + x %= y + return x + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 1, 20)]) +def test_mod_op_assign(): + return ModAssign() + + +class ModConst(torch.nn.Module): + def __init__(self): + super(ModConst, self).__init__() + + def forward(self, x): + return x % 2. + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20)]) +def test_mod_op_const(): + return ModConst() + + +class TorchMod(torch.nn.Module): + def __init__(self): + super(TorchMod, self).__init__() + + def forward(self, x, y): + return torch.fmod(x, y) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 40, 20)]) +def test_mod_func(): + return TorchMod() diff --git a/torch2trt/converters/ne.py b/torch2trt/converters/ne.py new file mode 100644 index 00000000..c28f16e7 --- /dev/null +++ b/torch2trt/converters/ne.py @@ -0,0 +1,54 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.ne') +@tensorrt_converter('torch.Tensor.__ne__') +def convert_ne(ctx): + input_a = ctx.method_args[0] + input_b = ctx.method_args[1] + output = ctx.method_return + input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b]) + input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1) + layer_1 = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.EQUAL) + layer_2 = ctx.network.add_unary(layer_1.get_output(0), trt.UnaryOperation.NOT) + output._trt = layer_2.get_output(0) + + +class NotEqual(torch.nn.Module): + def __init__(self): + super(NotEqual, self).__init__() + + def forward(self, x, y): + return x != y + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 1, 20)]) +def test_ne_op(): + return NotEqual() + + +class NotEqualConst(torch.nn.Module): + def __init__(self): + super(NotEqualConst, self).__init__() + + def forward(self, x): + return x != 13.62 + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20)]) +def test_ne_op_const(): + return NotEqualConst() + + +class TorchNotEqual(torch.nn.Module): + def __init__(self): + super(TorchNotEqual, self).__init__() + + def forward(self, x, y): + return torch.ne(x, y) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20), (1, 3, 1, 20)]) +def test_ne_torch(): + return TorchNotEqual() diff --git a/torch2trt/converters/relu.py b/torch2trt/converters/relu.py index c58405c1..986c1f30 100644 --- a/torch2trt/converters/relu.py +++ b/torch2trt/converters/relu.py @@ -6,6 +6,7 @@ @tensorrt_converter('torch.relu_') @tensorrt_converter('torch.nn.functional.relu') @tensorrt_converter('torch.nn.functional.relu_') +@tensorrt_converter('torch.Tensor.relu') def convert_functional_relu(ctx): ctx.method_args = (torch.nn.ReLU(),) + ctx.method_args convert_relu(ctx) @@ -32,4 +33,17 @@ def forward(self, x): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)]) def test_functional_relu_basic(): - return FunctionalRelu() \ No newline at end of file + return FunctionalRelu() + + +class TensorRelu(torch.nn.Module): + def __init__(self): + super(TensorRelu, self).__init__() + + def forward(self, x): + return x.relu() + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20)]) +def test_tensor_relu(): + return TensorRelu() diff --git a/torch2trt/converters/sigmoid.py b/torch2trt/converters/sigmoid.py index 795ce415..59639475 100644 --- a/torch2trt/converters/sigmoid.py +++ b/torch2trt/converters/sigmoid.py @@ -4,6 +4,7 @@ @tensorrt_converter('torch.nn.functional.sigmoid') @tensorrt_converter('torch.sigmoid') +@tensorrt_converter('torch.Tensor.sigmoid') def convert_sigmoid(ctx): input = ctx.method_args[0] input_trt = add_missing_trt_tensors(ctx.network, [input])[0] @@ -15,4 +16,17 @@ def convert_sigmoid(ctx): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) def test_sigmoid_basic(): - return torch.nn.Sigmoid() \ No newline at end of file + return torch.nn.Sigmoid() + + +class TensorSigmoid(torch.nn.Module): + def __init__(self): + super(TensorSigmoid, self).__init__() + + def forward(self, x): + return x.sigmoid() + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 40, 20)]) +def test_tensor_sigmoid(): + return TensorSigmoid() diff --git a/torch2trt/converters/tensor.py b/torch2trt/converters/tensor.py new file mode 100644 index 00000000..15406ac7 --- /dev/null +++ b/torch2trt/converters/tensor.py @@ -0,0 +1,22 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.tensor') +def convert_mod(ctx): + output = ctx.method_return + layer = ctx.network.add_constant(tuple(output.shape), output.detach().cpu().numpy() ) + output._trt = layer.get_output(0) + + +class TorchTensor(torch.nn.Module): + def __init__(self): + super(TorchTensor, self).__init__() + + def forward(self, x): + return x + torch.tensor([[1., 2., 3.], [4., 5., 6.]], device=torch.device('cuda')) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 2, 3)]) +def test_tensor_creation(): + return TorchTensor() From a5bdd2975807e7a4b5ce595229e51f81f6c5a631 Mon Sep 17 00:00:00 2001 From: John Date: Wed, 24 Feb 2021 16:21:53 -0800 Subject: [PATCH 340/355] added passing of torch2trt_kwargs to conversion context (#482) * added passing of torch2trt_kwargs to conversion context * added passing of torch2trt_kwargs to conversion context --- torch2trt/torch2trt.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index f5c9326f..0fe35d5c 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -357,12 +357,14 @@ def wrapper(*args, **kwargs): class ConversionContext(object): - def __init__(self, network, converters=CONVERTERS): + + def __init__(self, network, converters=CONVERTERS, torch2trt_kwargs=None): self.network = LayerNamingNetworkWrapper(self, network) self.lock = False self.method_args = None self.method_kwargs = None self.method_return = None + self.torch2trt_kwargs = torch2trt_kwargs self.hooks = [ ConversionHook(self, key, converter) for key, converter in converters.items() @@ -487,7 +489,12 @@ def torch2trt(module, int8_calib_dataset=None, int8_calib_algorithm=DEFAULT_CALIBRATION_ALGORITHM, int8_calib_batch_size=1, - use_onnx=False): + use_onnx=False, + **kwargs): + + # capture arguments to provide to context + kwargs.update(locals()) + kwargs.pop('kwargs') inputs_in = inputs @@ -524,7 +531,7 @@ def torch2trt(module, else: network = builder.create_network() - with ConversionContext(network) as ctx: + with ConversionContext(network, torch2trt_kwargs=kwargs) as ctx: ctx.add_inputs(inputs, input_names) From e55a7a5123912d9868afb43ca2efcb7b068862b9 Mon Sep 17 00:00:00 2001 From: John Date: Thu, 25 Feb 2021 02:14:19 -0800 Subject: [PATCH 341/355] added filter to floordiv to only enable for pytorch 1.6+ (#511) * added filter to floordiv to only enable for pytorch 1.6+ * enabled soft failure for missing torch method --- torch2trt/torch2trt.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 0fe35d5c..cc628ede 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -19,6 +19,10 @@ def trt_version(): return trt.__version__ +def torch_version(): + return torch.__version__ + + def torch_dtype_to_trt(dtype): if trt_version() >= '7.0' and dtype == torch.bool: return trt.bool @@ -593,7 +597,10 @@ def tensorrt_converter(method, is_real=True, enabled=True, imports=[]): else: module, module_name, qual_name = importlib.import_module(method.__module__), method.__module__, method.__qualname__ - method_impl = eval('copy.deepcopy(module.%s)' % qual_name) + try: + method_impl = eval('copy.deepcopy(module.%s)' % qual_name) + except: + enabled = False def register_converter(converter): CONVERTERS[method] = { @@ -615,4 +622,4 @@ def pass_converter(converter): else: return pass_converter - return register_converter \ No newline at end of file + return register_converter From 15da623b216d419ecea2a24076872693fca94ec8 Mon Sep 17 00:00:00 2001 From: John Date: Tue, 2 Mar 2021 12:42:03 -0800 Subject: [PATCH 342/355] increment version to 0.2.0 (#517) --- CHANGELOG.md | 2 ++ setup.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c38b0139..ee318b92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## [Master] +## [0.2.0] - 03/02/2021 + ### Added - Added converter for ``torch.Tensor.expand`` diff --git a/setup.py b/setup.py index 68879132..77d25901 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,7 @@ def trt_lib_dir(): setup( name='torch2trt', - version='0.1.0', + version='0.2.0', description='An easy to use PyTorch to TensorRT converter', packages=find_packages(), ext_package='torch2trt', From 44977a94cb087fe521421802e9df12a5ac3ceb3f Mon Sep 17 00:00:00 2001 From: John Date: Tue, 2 Mar 2021 13:02:22 -0800 Subject: [PATCH 343/355] Release push docs tagfix (#519) * increment version to 0.2.0 * realse push docs tagfix --- scripts/release_push_docs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/release_push_docs.sh b/scripts/release_push_docs.sh index 9d5fc870..da008c2c 100755 --- a/scripts/release_push_docs.sh +++ b/scripts/release_push_docs.sh @@ -2,6 +2,6 @@ TAG=$1 -python3 scripts/dump_converters.py > docs/converters.md +python3 scripts/dump_converters.py --tag=$TAG > docs/converters.md mike deploy $TAG --push From 8100c6a5b17e80fd6644d680516ac06ab43052ca Mon Sep 17 00:00:00 2001 From: John Date: Sat, 20 Mar 2021 16:24:11 -0700 Subject: [PATCH 344/355] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 7f73c56a..b770bd88 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # torch2trt +> What models are you using, or hoping to use, with TensorRT? Feel free to join the discussion [here](https://github.com/NVIDIA-AI-IOT/torch2trt/discussions/531). + torch2trt is a PyTorch to TensorRT converter which utilizes the From 72c81f9189e9ab99e0f58df307e266151151d119 Mon Sep 17 00:00:00 2001 From: John Date: Sat, 20 Mar 2021 16:36:28 -0700 Subject: [PATCH 345/355] Create CLA.md --- CLA.md | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 CLA.md diff --git a/CLA.md b/CLA.md new file mode 100644 index 00000000..389a4746 --- /dev/null +++ b/CLA.md @@ -0,0 +1,58 @@ +## Individual Contributor License Agreement (CLA) + +**Thank you for submitting your contributions to this project.** + +By signing this CLA, you agree that the following terms apply to all of your past, present and future contributions +to the project. + +### License. + +You hereby represent that all present, past and future contributions are governed by the +[MIT License](https://opensource.org/licenses/MIT) +copyright statement. + +This entails that to the extent possible under law, you transfer all copyright and related or neighboring rights +of the code or documents you contribute to the project itself or its maintainers. +Furthermore you also represent that you have the authority to perform the above waiver +with respect to the entirety of you contributions. + +### Moral Rights. + +To the fullest extent permitted under applicable law, you hereby waive, and agree not to +assert, all of your “moral rights” in or relating to your contributions for the benefit of the project. + +### Third Party Content. + +If your Contribution includes or is based on any source code, object code, bug fixes, configuration changes, tools, +specifications, documentation, data, materials, feedback, information or other works of authorship that were not +authored by you (“Third Party Content”) or if you are aware of any third party intellectual property or proprietary +rights associated with your Contribution (“Third Party Rights”), +then you agree to include with the submission of your Contribution full details respecting such Third Party +Content and Third Party Rights, including, without limitation, identification of which aspects of your +Contribution contain Third Party Content or are associated with Third Party Rights, the owner/author of the +Third Party Content and Third Party Rights, where you obtained the Third Party Content, and any applicable +third party license terms or restrictions respecting the Third Party Content and Third Party Rights. For greater +certainty, the foregoing obligations respecting the identification of Third Party Content and Third Party Rights +do not apply to any portion of a Project that is incorporated into your Contribution to that same Project. + +### Representations. + +You represent that, other than the Third Party Content and Third Party Rights identified by +you in accordance with this Agreement, you are the sole author of your Contributions and are legally entitled +to grant the foregoing licenses and waivers in respect of your Contributions. If your Contributions were +created in the course of your employment with your past or present employer(s), you represent that such +employer(s) has authorized you to make your Contributions on behalf of such employer(s) or such employer +(s) has waived all of their right, title or interest in or to your Contributions. + +### Disclaimer. + +To the fullest extent permitted under applicable law, your Contributions are provided on an "as is" +basis, without any warranties or conditions, express or implied, including, without limitation, any implied +warranties or conditions of non-infringement, merchantability or fitness for a particular purpose. You are not +required to provide support for your Contributions, except to the extent you desire to provide support. + +### No Obligation. + +You acknowledge that the maintainers of this project are under no obligation to use or incorporate your contributions +into the project. The decision to use or incorporate your contributions into the project will be made at the +sole discretion of the maintainers or their authorized delegates. From b9d9e73ca5f510e9be99ac2a9363f7b29cf03233 Mon Sep 17 00:00:00 2001 From: John Date: Thu, 25 Mar 2021 13:25:32 -0700 Subject: [PATCH 346/355] Functional conv (#535) * added conv_functional * add Tensor flatten * update changelog for functional conv / flatten * add site to gitignore --- .gitignore | 1 + CHANGELOG.md | 3 + torch2trt/converters/__init__.py | 1 + torch2trt/converters/conv_functional.py | 127 ++++++++++++++++++++++++ torch2trt/converters/view.py | 1 + 5 files changed, 133 insertions(+) create mode 100644 torch2trt/converters/conv_functional.py diff --git a/.gitignore b/.gitignore index 5b2ee2e5..cd56c03b 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ __pycache__/ *.ipynb_checkpoints *.pth docs/converters.md +site \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index ee318b92..1e4af331 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ ## [0.2.0] - 03/02/2021 +- Added converter for ``torch.Tensor.flatten`` +- Added converter for ``torch.nn.functional.conv2d`` and ``torch.nn.functional.conv3d`` + ### Added - Added converter for ``torch.Tensor.expand`` diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 85ca6cf9..c1ab931b 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -7,6 +7,7 @@ from .AdaptiveAvgPool2d import * from .BatchNorm1d import * from .BatchNorm2d import * +from .conv_functional import * from .Conv import * from .Conv1d import * from .Conv2d import * diff --git a/torch2trt/converters/conv_functional.py b/torch2trt/converters/conv_functional.py new file mode 100644 index 00000000..e9cc42d4 --- /dev/null +++ b/torch2trt/converters/conv_functional.py @@ -0,0 +1,127 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.nn.functional.conv2d', enabled=trt_version() >= '7.0') +@tensorrt_converter('torch.nn.functional.conv3d', enabled=trt_version() >= '7.0') +def convert_Conv_trt7_functional(ctx): + input = get_arg(ctx, 'input', pos=0, default=None) + weight = get_arg(ctx, 'weight', pos=1, default=None) + bias = get_arg(ctx, 'bias', pos=2, default=None) + stride = get_arg(ctx, 'stride', pos=3, default=1) + padding = get_arg(ctx, 'padding', pos=4, default=0) + dilation = get_arg(ctx, 'dilation', pos=5, default=1) + groups = get_arg(ctx, 'groups', pos=6, default=1) + + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] + output = ctx.method_return + + input_dim = input.dim() - 2 + + out_channels = int(weight.shape[0]) + kernel_size = tuple(weight.shape[2:]) + if not isinstance(kernel_size, tuple): + kernel_size = (kernel_size, ) * input_dim + + if not isinstance(stride, tuple): + stride = (stride, ) * input_dim + + if not isinstance(padding, tuple): + padding = (padding, ) * input_dim + + if not isinstance(dilation, tuple): + dilation = (dilation, ) * input_dim + + kernel = weight.detach().cpu().numpy() + + if bias is not None: + bias = bias.detach().cpu().numpy() + + layer = ctx.network.add_convolution_nd( + input=input_trt, + num_output_maps=out_channels, + kernel_shape=kernel_size, + kernel=kernel, + bias=bias) + layer.stride_nd = stride + layer.padding_nd = padding + layer.dilation_nd = dilation + + if groups is not None: + layer.num_groups = groups + + output._trt = layer.get_output(0) + + +class FunctionalConv2d(torch.nn.Module): + def __init__(self, *args, **kwargs): + super().__init__() + self.conv = torch.nn.Conv2d(*args, **kwargs) + + def forward(self, x): + x = torch.nn.functional.conv2d( + x, + self.conv.weight, + self.conv.bias, + self.conv.stride, + self.conv.padding, + self.conv.dilation, + self.conv.groups + ) + return x + +class FunctionalConv3d(torch.nn.Module): + def __init__(self, *args, **kwargs): + super().__init__() + self.conv = torch.nn.Conv3d(*args, **kwargs) + + def forward(self, x): + x = torch.nn.functional.conv3d( + x, + self.conv.weight, + self.conv.bias, + self.conv.stride, + self.conv.padding, + self.conv.dilation, + self.conv.groups + ) + return x + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)], enabled=trt_version() >= '7.0') +def test_Conv2d_basic_trt7_functional(): + return FunctionalConv2d(10, 5, kernel_size=1, stride=1, padding=0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)], enabled=trt_version() >= '7.0') +def test_Conv2d_stride2_trt7_functional(): + return FunctionalConv2d(10, 5, kernel_size=1, stride=2, padding=0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)], enabled=trt_version() >= '7.0') +def test_Conv2d_kernel3_trt7_functional(): + return FunctionalConv2d(10, 5, kernel_size=3, stride=2, padding=1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)], enabled=trt_version() >= '7.0') +def test_Conv2d_dilation2_trt7_functional(): + return FunctionalConv2d(10, 5, kernel_size=3, stride=1, padding=1, dilation=2) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)], enabled=trt_version() >= '7.0') +def test_Conv3d_basic_trt7_functional(): + return FunctionalConv3d(10, 5, kernel_size=1, stride=1, padding=0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)], enabled=trt_version() >= '7.0') +def test_Conv3d_stride2_trt7_functional(): + return FunctionalConv3d(10, 5, kernel_size=1, stride=2, padding=0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)], enabled=trt_version() >= '7.0') +def test_Conv3d_kernel3_trt7_functional(): + return FunctionalConv3d(10, 5, kernel_size=3, stride=2, padding=1) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 64, 64, 64)], enabled=trt_version() >= '7.0') +def test_Conv3d_dilation2_trt7_functional(): + return FunctionalConv3d(10, 5, kernel_size=3, stride=1, padding=1, dilation=2) diff --git a/torch2trt/converters/view.py b/torch2trt/converters/view.py index 2608ed21..b60dabb4 100644 --- a/torch2trt/converters/view.py +++ b/torch2trt/converters/view.py @@ -7,6 +7,7 @@ @tensorrt_converter('torch.Tensor.view') @tensorrt_converter('torch.Tensor.squeeze') @tensorrt_converter('torch.Tensor.unsqueeze') +@tensorrt_converter('torch.Tensor.flatten') @tensorrt_converter('torch.squeeze') @tensorrt_converter('torch.unsqueeze') def convert_view(ctx): From 8f7f4f7aad6c4378feca90d0da85409cc14284e6 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 26 Apr 2021 14:31:38 -0700 Subject: [PATCH 347/355] added contributors page --- CONTRIBUTORS.md | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 CONTRIBUTORS.md diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md new file mode 100644 index 00000000..a9709002 --- /dev/null +++ b/CONTRIBUTORS.md @@ -0,0 +1,46 @@ +# Contributors + +Below is a list of developers who have contributed to torch2trt. This is also used to track contributors +who have agreed to torch2trt's Contributor License Agreement. + + +## Becoming a Contributor + +If you've made a notable contribution to torch2trt and wish to be listed as a contributor, simply do the following. + +1. Modify ``CONTRIBUTORS.md`` and add your name with a hyperlink to your GitHub account. + + ```md + - [](https://github.com/) + ``` + +2. Stage the changes in a standalone commit + + ```md + git add CONTRIBUTORS.md + ``` + +3. Make a signed commit with the following message text + + ```md + git commit -S -m "Added to CONTRIBUTORS.md." + ``` + +## Signing Contributor License Agreement (CLA) + +In some instances, you may be requested to sign torch2trt's Contributor License Agreement (CLA). To do so, + +1. If you're not already listed as a contributor in CONTRIBUTORS.md, make a commit as described above to add yourself to CONTRIBUTORS.md + +2. Add the text ``(CLA)`` after your name in ``CONTRIBUTORS.md`` +3. Stage the changes in a standalone commit + + ```md + git add CONTRIBUTORS.md + ``` +4. Make a signed commit with the following text + + ```md + git commit -S -m "I have read and agree to the Contributor License Agreement as written in the file CLA.md of this project. Signed, " + ``` + From 2d6150aab1143c4652846ff3b6ebfe4be18a7ada Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 26 Apr 2021 14:44:57 -0700 Subject: [PATCH 348/355] Added John Welsh to CONTRIBUTORS.md --- CONTRIBUTORS.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index a9709002..0a986e68 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -3,12 +3,13 @@ Below is a list of developers who have contributed to torch2trt. This is also used to track contributors who have agreed to torch2trt's Contributor License Agreement. +- [John Welsh](https://github.com/jaybdub) ## Becoming a Contributor If you've made a notable contribution to torch2trt and wish to be listed as a contributor, simply do the following. -1. Modify ``CONTRIBUTORS.md`` and add your name with a hyperlink to your GitHub account. +1. Modify ``CONTRIBUTORS.md`` and add your name with a hyperlink to your GitHub account to the end of the contributors list. ```md - [](https://github.com/) @@ -23,7 +24,7 @@ If you've made a notable contribution to torch2trt and wish to be listed as a co 3. Make a signed commit with the following message text ```md - git commit -S -m "Added to CONTRIBUTORS.md." + git commit -m "Added to CONTRIBUTORS.md" ``` ## Signing Contributor License Agreement (CLA) From 1dc9ad1a46ee18f4cf947f30339f2c0a7ffeb0d4 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Mon, 26 Apr 2021 14:45:41 -0700 Subject: [PATCH 349/355] I have read and agree to the Contributor License Agreement as written in the file CLA.md of this project. Signed, John Welsh --- CONTRIBUTORS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 0a986e68..4c2d6431 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -3,7 +3,7 @@ Below is a list of developers who have contributed to torch2trt. This is also used to track contributors who have agreed to torch2trt's Contributor License Agreement. -- [John Welsh](https://github.com/jaybdub) +- [John Welsh](https://github.com/jaybdub) (CLA) ## Becoming a Contributor From 2653a712b3dc9ba2ed38e2637ed9f96d44c809f7 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 15 Jun 2021 19:03:43 +0000 Subject: [PATCH 350/355] added silu converter --- torch2trt/converters/__init__.py | 1 + torch2trt/converters/silu.py | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 torch2trt/converters/silu.py diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index c1ab931b..5ea23f9c 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -50,6 +50,7 @@ from .relu import * from .relu6 import * from .sigmoid import * +from .silu import * from .softmax import * from .split import * from .stack import * diff --git a/torch2trt/converters/silu.py b/torch2trt/converters/silu.py new file mode 100644 index 00000000..37d385c5 --- /dev/null +++ b/torch2trt/converters/silu.py @@ -0,0 +1,21 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.nn.functional.silu') +def convert_silu(ctx): + input = get_arg(ctx, 'input', pos=0, default=None) + output = ctx.method_return + input_trt = add_missing_trt_tensors(ctx.network, [input])[0] + + layer = ctx.network.add_activation(input_trt, trt.ActivationType.SIGMOID) + layer = ctx.network.add_elementwise(input_trt, layer.get_output(0), trt.ElementWiseOperation.PROD) + + output._trt = layer.get_output(0) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3, 3)]) +def test_silu(): + return torch.nn.SiLU() \ No newline at end of file From fb67b42a77114937ce8be7acaa66fa1bd1a72c6d Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 15 Jun 2021 19:33:16 +0000 Subject: [PATCH 351/355] functional linear converter --- torch2trt/converters/Linear.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/torch2trt/converters/Linear.py b/torch2trt/converters/Linear.py index 323a474f..08d7d4a4 100644 --- a/torch2trt/converters/Linear.py +++ b/torch2trt/converters/Linear.py @@ -2,10 +2,11 @@ from torch2trt.module_test import add_module_test -@tensorrt_converter('torch.nn.Linear.forward') +@tensorrt_converter('torch.nn.functional.linear') def convert_Linear(ctx): - module = ctx.method_args[0] - input = ctx.method_args[1] + input = ctx.method_args[0] + weight = get_arg(ctx, 'weight', 1, None) + bias = get_arg(ctx, 'bias', 2, None) input_trt = add_missing_trt_tensors(ctx.network, [input])[0] output = ctx.method_return @@ -13,23 +14,23 @@ def convert_Linear(ctx): layer = ctx.network.add_shuffle(input_trt) layer.reshape_dims = tuple(input_trt.shape) + (1, 1) - bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) - if module.bias is not None: - bias = module.bias.detach().cpu().numpy() + bias_trt = trt.Weights(torch_dtype_to_trt(weight.dtype)) + if bias is not None: + bias_trt = bias.detach().cpu().numpy() # add fully connected layer = ctx.network.add_fully_connected( input=layer.get_output(0), - num_outputs=module.out_features, - kernel=module.weight.detach().cpu().numpy(), - bias=bias) + num_outputs=int(weight.shape[0]), + kernel=weight.detach().cpu().numpy(), + bias=bias_trt) # reshape back to N layer = ctx.network.add_shuffle(layer.get_output(0)) layer.reshape_dims = tuple(output.shape[1:]) output._trt = layer.get_output(0) - + @add_module_test(torch.float32, torch.device('cuda'), [(1, 10)]) @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10)]) @@ -42,4 +43,4 @@ def test_Linear_basic(): @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10)]) @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 10)]) def test_Linear_no_bias(): - return torch.nn.Linear(10, 5, bias=False) \ No newline at end of file + return torch.nn.Linear(10, 5, bias=False) From 5ee60339f828e35b60c7d6b7d2535d3d2a5e43c1 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 15 Jun 2021 21:07:39 +0000 Subject: [PATCH 352/355] added gelu converter --- CHANGELOG.md | 4 ++ torch2trt/converters/__init__.py | 1 + torch2trt/converters/gelu.py | 63 ++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+) create mode 100644 torch2trt/converters/gelu.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e4af331..3ca436a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Master] +- Added converter for ``torch.nn.functional.gelu`` +- Added converter for ``torch.nn.functional.linear`` +- Added converter for ``torch.nn.functional.silu`` + ## [0.2.0] - 03/02/2021 - Added converter for ``torch.Tensor.flatten`` diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index 5ea23f9c..ac0ae37a 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -28,6 +28,7 @@ from .div import * from .expand import * from .floordiv import * +from .gelu import * from .getitem import * from .identity import * from .instance_norm import * diff --git a/torch2trt/converters/gelu.py b/torch2trt/converters/gelu.py new file mode 100644 index 00000000..7e350864 --- /dev/null +++ b/torch2trt/converters/gelu.py @@ -0,0 +1,63 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test +import math + + +@tensorrt_converter('torch.nn.functional.gelu') +def convert_gelu_v1(ctx): + # approximate equation 1 from paper + input = get_arg(ctx, 'input', 0, None) + output = ctx.method_return + + x, c05, c1, cs2pi, c044, c3 = add_missing_trt_tensors( + ctx.network, + [input, 0.5, 1.0, math.sqrt(2.0 / math.pi), 0.044715, 3.0] + ) + + x, c05, c1, cs2pi, c044, c3 = broadcast_trt_tensors( + ctx.network, + [x, c05, c1, cs2pi, c044, c3], + len(output.shape) - 1 + ) + + y = ctx.network.add_elementwise(x, c3, trt.ElementWiseOperation.POW).get_output(0) + y = ctx.network.add_elementwise(y, c044, trt.ElementWiseOperation.PROD).get_output(0) + y = ctx.network.add_elementwise(x, y, trt.ElementWiseOperation.SUM).get_output(0) + y = ctx.network.add_elementwise(y, cs2pi, trt.ElementWiseOperation.PROD).get_output(0) + y = ctx.network.add_activation(y, trt.ActivationType.TANH).get_output(0) + y = ctx.network.add_elementwise(y, c1, trt.ElementWiseOperation.SUM).get_output(0) + y = ctx.network.add_elementwise(x, y, trt.ElementWiseOperation.PROD).get_output(0) + y = ctx.network.add_elementwise(y, c05, trt.ElementWiseOperation.PROD).get_output(0) + + output._trt = y + + +# @tensorrt_converter('torch.nn.functional.gelu') +# def convert_gelu_v2(ctx): +# # approximate equation 1 from paper +# input = get_arg(ctx, 'input', 0, None) +# output = ctx.method_return + +# x, c1702 = add_missing_trt_tensors( +# ctx.network, +# [input, 1.702] +# ) + +# x, c1702 = broadcast_trt_tensors( +# ctx.network, +# [x, c1702], +# len(output.shape) - 1 +# ) + +# y = ctx.network.add_elementwise(x, c1702, trt.ElementWiseOperation.PROD).get_output(0) +# y = ctx.network.add_activation(y, trt.ActivationType.SIGMOID).get_output(0) +# y = ctx.network.add_elementwise(x, y, trt.ElementWiseOperation.PROD).get_output(0) + +# output._trt = y + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3, 3)]) +def test_silu(): + return torch.nn.GELU() \ No newline at end of file From ddb355825bdf2841004c24b4b81349c77c7b168a Mon Sep 17 00:00:00 2001 From: John Welsh Date: Tue, 15 Jun 2021 22:08:56 +0000 Subject: [PATCH 353/355] added layer_norm converter --- CHANGELOG.md | 1 + torch2trt/converters/__init__.py | 3 +- torch2trt/converters/layer_norm.py | 103 +++++++++++++++++++++++++++++ torch2trt/torch2trt.py | 11 +++ 4 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 torch2trt/converters/layer_norm.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ca436a6..a69b7eb3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## [Master] +- Added converter for ``torch.nn.functional.layer_norm`` - Added converter for ``torch.nn.functional.gelu`` - Added converter for ``torch.nn.functional.linear`` - Added converter for ``torch.nn.functional.silu`` diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index ac0ae37a..b41253f9 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -30,10 +30,11 @@ from .floordiv import * from .gelu import * from .getitem import * +from .group_norm import * from .identity import * from .instance_norm import * from .interpolate import * -from .group_norm import * +from .layer_norm import * from .max import * from .max_pool2d import * from .mean import * diff --git a/torch2trt/converters/layer_norm.py b/torch2trt/converters/layer_norm.py new file mode 100644 index 00000000..ab6b699d --- /dev/null +++ b/torch2trt/converters/layer_norm.py @@ -0,0 +1,103 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +@tensorrt_converter('torch.nn.functional.layer_norm') +def convert_layernorm(ctx): + input = get_arg(ctx, 'input', 0, None) + shape = get_arg(ctx, 'normalized_shape', 1, None) + weight = get_arg(ctx, 'weight', 2, None) + bias = get_arg(ctx, 'bias', 3, None) + eps = get_arg(ctx, 'eps', 4, 1e-05) + output = ctx.method_return + + input_trt, eps_trt = add_missing_trt_tensors( + ctx.network, + [input, eps] + ) + + input_trt, eps_trt = broadcast_trt_tensors( + ctx.network, + [input_trt, eps_trt], + len(output.shape) - 1 + ) + + if weight is not None: + _, weight_trt = add_missing_trt_tensors( + ctx.network, + [input, weight] + ) + _, weight_trt = broadcast_trt_tensors( + ctx.network, + [input_trt, weight_trt], + len(output.shape) - 1 + ) + + if bias is not None: + _, bias_trt = add_missing_trt_tensors( + ctx.network, + [input, bias] + ) + _, bias_trt = broadcast_trt_tensors( + ctx.network, + [input_trt, bias_trt], + len(output.shape) - 1 + ) + + if isinstance(shape, int): + shape = (shape,) + dim = tuple([-i - 1 for i in range(len(shape))]) + dim = torch_dim_resolve_negative(dim, len(input.shape)) + axes = torch_dim_to_trt_axes(dim) + + ux = ctx.network.add_reduce(input_trt, trt.ReduceOperation.AVG, axes, keep_dims=True).get_output(0) + numerator = ctx.network.add_elementwise(input_trt, ux, trt.ElementWiseOperation.SUB).get_output(0) + varx = ctx.network.add_elementwise(numerator, numerator, trt.ElementWiseOperation.PROD).get_output(0) + varx = ctx.network.add_reduce(varx, trt.ReduceOperation.AVG, axes, keep_dims=True).get_output(0) + denom = ctx.network.add_elementwise(varx, eps_trt, trt.ElementWiseOperation.SUM).get_output(0) + denom = ctx.network.add_unary(denom, trt.UnaryOperation.SQRT).get_output(0) + y = ctx.network.add_elementwise(numerator, denom, trt.ElementWiseOperation.DIV).get_output(0) + + if weight is not None: + y = ctx.network.add_elementwise(y, weight_trt, trt.ElementWiseOperation.PROD).get_output(0) + + if bias is not None: + y = ctx.network.add_elementwise(y, bias_trt, trt.ElementWiseOperation.SUM).get_output(0) + + output._trt = y + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 5, 3)]) +def test_layer_norm_1d(): + return torch.nn.LayerNorm(3) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 5, 3)]) +def test_layer_norm_2d(): + return torch.nn.LayerNorm((5, 3)) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 5, 3)]) +def test_layer_norm_3d(): + return torch.nn.LayerNorm((5, 5, 3)) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 5, 3)]) +def test_layer_norm_1d_nonaffine(): + return torch.nn.LayerNorm(3, elementwise_affine=False) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 5, 3)]) +def test_layer_norm_2d_nonaffine(): + return torch.nn.LayerNorm((5, 3), elementwise_affine=False) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 5, 3)]) +def test_layer_norm_3d_nonaffine(): + return torch.nn.LayerNorm((5, 5, 3), elementwise_affine=False) \ No newline at end of file diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index cc628ede..6b153a02 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -87,6 +87,17 @@ def trt_num_outputs(engine): return count +def torch_dim_resolve_negative(dim, ndim): + if not isinstance(dim, tuple): + dim = (dim,) + pos = [] + for d in dim: + if d < 0: + d = ndim + d + pos.append(d) + return tuple(pos) + + def torch_dim_to_trt_axes(dim): """Converts torch dim, or tuple of dims to a tensorrt axes bitmask""" if not isinstance(dim, tuple): From 8ce974ce879ff061702e8221a3b79a0412159df6 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 16 Jun 2021 00:48:59 +0000 Subject: [PATCH 354/355] added converter for torch.einsum --- CHANGELOG.md | 1 + torch2trt/__init__.py | 1 + torch2trt/converters/__init__.py | 1 + torch2trt/converters/einsum.py | 65 +++++++++++++++++++++++ torch2trt/einsum_backend.py | 88 ++++++++++++++++++++++++++++++++ torch2trt/torch2trt.py | 9 +++- 6 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 torch2trt/converters/einsum.py create mode 100644 torch2trt/einsum_backend.py diff --git a/CHANGELOG.md b/CHANGELOG.md index a69b7eb3..1645ae62 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## [Master] +- Added converter for ``torch.einsum`` - Added converter for ``torch.nn.functional.layer_norm`` - Added converter for ``torch.nn.functional.gelu`` - Added converter for ``torch.nn.functional.linear`` diff --git a/torch2trt/__init__.py b/torch2trt/__init__.py index fa80d89b..863817f6 100644 --- a/torch2trt/__init__.py +++ b/torch2trt/__init__.py @@ -1,5 +1,6 @@ from .torch2trt import * from .converters import * +from .einsum_backend import tensordot, transpose import tensorrt as trt diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index b41253f9..f4854475 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -26,6 +26,7 @@ from .clamp import * from .compare import * from .div import * +from .einsum import * from .expand import * from .floordiv import * from .gelu import * diff --git a/torch2trt/converters/einsum.py b/torch2trt/converters/einsum.py new file mode 100644 index 00000000..f0215907 --- /dev/null +++ b/torch2trt/converters/einsum.py @@ -0,0 +1,65 @@ +from torch2trt.torch2trt import * +from torch2trt.module_test import add_module_test + + +def has_opt_einsum(): + try: + from opt_einsum import contract + return True + except: + return False + + +def einsum_remove_batch(expr): + expr = expr.replace(' ', '') + if '->' in expr: + ins, outs = expr.split('->') + # assume first dim is batch + ins = ','.join([x[1:] for x in ins.split(',')]) + outs = ','.join([x[1:] for x in outs.split(',')]) + + expr = '->'.join([ins, outs]) + else: + ins = expr + ins = ','.join([x[1:] for x in ins.split(',')]) + expr = ins + + return expr + + +@tensorrt_converter('torch.einsum', enabled=has_opt_einsum()) +def convert_einsum(ctx): + + from opt_einsum import contract + + equation = ctx.method_args[0] + operands = ctx.method_args[1:] + operands_trt = add_missing_trt_tensors( + ctx.network, + operands, + ) + outputs = ctx.method_return + equation = einsum_remove_batch(equation) + outputs_trt = contract(equation, *operands_trt, backend='torch2trt') + + if not isinstance(outputs, (tuple, list)): + outputs = (outputs,) + outputs_trt = (outputs_trt,) + + for out, out_trt in zip(outputs, outputs_trt): + out._trt = out_trt + + +class Einsum(torch.nn.Module): + + def __init__(self, expr): + super().__init__() + self.expr = expr + + def forward(self, *args): + return torch.einsum(self.expr, *args) + + +@add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4), (1, 4, 5)], enabled=has_opt_einsum()) +def test_einsum(): + return Einsum('bij,bjk->bik') \ No newline at end of file diff --git a/torch2trt/einsum_backend.py b/torch2trt/einsum_backend.py new file mode 100644 index 00000000..1628eb72 --- /dev/null +++ b/torch2trt/einsum_backend.py @@ -0,0 +1,88 @@ +from .torch2trt import * +import numpy as np + + +def tensordot(a, b, axes=2): + try: + iter(axes) + except Exception: + axes_a = list(range(-axes, 0)) + axes_b = list(range(0, axes)) + else: + axes_a, axes_b = axes + try: + na = len(axes_a) + axes_a = list(axes_a) + except TypeError: + axes_a = [axes_a] + na = 1 + try: + nb = len(axes_b) + axes_b = list(axes_b) + except TypeError: + axes_b = [axes_b] + nb = 1 + +# a, b = np.asarray(a), np.asarray(b) + as_ = a.shape + nda = len(as_) #a.ndim + bs = b.shape + ndb = len(bs) #b.ndim + equal = True + if na != nb: + equal = False + else: + for k in range(na): + if as_[axes_a[k]] != bs[axes_b[k]]: + equal = False + break + if axes_a[k] < 0: + axes_a[k] += nda + if axes_b[k] < 0: + axes_b[k] += ndb + if not equal: + raise ValueError("shape-mismatch for sum") + + # Move the axes to sum over to the end of "a" + # and to the front of "b" + notin = [k for k in range(nda) if k not in axes_a] + newaxes_a = notin + axes_a + N2 = 1 + for axis in axes_a: + N2 *= as_[axis] + newshape_a = (int(np.multiply.reduce([as_[ax] for ax in notin])), N2) + olda = [as_[axis] for axis in notin] + + notin = [k for k in range(ndb) if k not in axes_b] + newaxes_b = axes_b + notin + N2 = 1 + for axis in axes_b: + N2 *= bs[axis] + newshape_b = (N2, int(np.multiply.reduce([bs[ax] for ax in notin]))) + oldb = [bs[axis] for axis in notin] + + network = active_context().network + + at = network.add_shuffle(a) + at.first_transpose = newaxes_a + at.reshape_dims = newshape_a + at = at.get_output(0) + + bt = network.add_shuffle(b) + bt.first_transpose = newaxes_b + bt.reshape_dims = newshape_b + bt = bt.get_output(0) + +# at = a.transpose(newaxes_a).reshape(newshape_a) +# bt = b.transpose(newaxes_b).reshape(newshape_b) + res = network.add_matrix_multiply(at, trt.MatrixOperation.NONE, bt, trt.MatrixOperation.NONE).get_output(0) + res = network.add_shuffle(res) + res.reshape_dims = olda + oldb + return res.get_output(0) + + +def transpose(x, axes=None): + network = active_context().network + xt = network.add_shuffle(x) + xt.first_transpose = axes + return xt.get_output(0) diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 6b153a02..6c014721 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -370,9 +370,14 @@ def wrapper(*args, **kwargs): else: return attr +def active_context(): + return ConversionContext._ACTIVE_CONTEXT + class ConversionContext(object): + _ACTIVE_CONTEXT = None + def __init__(self, network, converters=CONVERTERS, torch2trt_kwargs=None): self.network = LayerNamingNetworkWrapper(self, network) self.lock = False @@ -388,12 +393,14 @@ def __init__(self, network, converters=CONVERTERS, torch2trt_kwargs=None): def __enter__(self): for hook in self.hooks: hook.__enter__() + ConversionContext._ACTIVE_CONTEXT = self return self def __exit__(self, type, val, tb): for hook in self.hooks: hook.__exit__(type, val, tb) - + ConversionContext._ACTIVE_CONTEXT = None + def add_inputs(self, torch_inputs, names=None): if names is None: names = default_input_names(len(torch_inputs)) From 2e4ccd35d6f4973801574731da732ebf18828800 Mon Sep 17 00:00:00 2001 From: John Welsh Date: Wed, 16 Jun 2021 19:43:53 +0000 Subject: [PATCH 355/355] einsum --- torch2trt/converters/__init__.py | 2 +- torch2trt/converters/{einsum.py => einsum_converter.py} | 0 torch2trt/torch2trt.py | 6 +++--- 3 files changed, 4 insertions(+), 4 deletions(-) rename torch2trt/converters/{einsum.py => einsum_converter.py} (100%) diff --git a/torch2trt/converters/__init__.py b/torch2trt/converters/__init__.py index f4854475..67bfe6c9 100644 --- a/torch2trt/converters/__init__.py +++ b/torch2trt/converters/__init__.py @@ -26,7 +26,7 @@ from .clamp import * from .compare import * from .div import * -from .einsum import * +from .einsum_converter import * from .expand import * from .floordiv import * from .gelu import * diff --git a/torch2trt/converters/einsum.py b/torch2trt/converters/einsum_converter.py similarity index 100% rename from torch2trt/converters/einsum.py rename to torch2trt/converters/einsum_converter.py diff --git a/torch2trt/torch2trt.py b/torch2trt/torch2trt.py index 6c014721..0c789bd9 100644 --- a/torch2trt/torch2trt.py +++ b/torch2trt/torch2trt.py @@ -329,7 +329,7 @@ def __enter__(self): ) ) - def __exit__(self, type, val, tb): + def __exit__(self, *args, **kwargs): self._set_method(self.converter['method_impl']) def default_input_names(num_inputs): @@ -396,9 +396,9 @@ def __enter__(self): ConversionContext._ACTIVE_CONTEXT = self return self - def __exit__(self, type, val, tb): + def __exit__(self, *args, **kwargs): for hook in self.hooks: - hook.__exit__(type, val, tb) + hook.__exit__(*args, **kwargs) ConversionContext._ACTIVE_CONTEXT = None def add_inputs(self, torch_inputs, names=None):