diff --git a/Readme.md b/Readme.md
index b901525a..5817218f 100644
--- a/Readme.md
+++ b/Readme.md
@@ -147,32 +147,42 @@ To see torch-ort in action, see https://github.com/microsoft/onnxruntime-trainin
# Accelerate inference for PyTorch models with ONNX Runtime (Preview)
-ONNX Runtime for PyTorch accelerates PyTorch model inference using ONNX Runtime.
+ONNX Runtime for PyTorch is now extended to support PyTorch model inference using ONNX Runtime.
-It is available via the torch-ort-inference python package. This preview package enables OpenVINO™ Execution Provider for ONNX Runtime by default for accelerating inference on various Intel CPUs and integrated GPUs.
+It is available via the torch-ort-inference python package. This preview package enables OpenVINO™ Execution Provider for ONNX Runtime by default for accelerating inference on various Intel® CPUs, Intel® integrated GPUs, and Intel® Movidius™ Vision Processing Units - referred to as VPU.
This repository contains the source code for the package, as well as instructions for running the package.
+## Prerequisites
+
+- Ubuntu 18.04, 20.04
+
+- Python* 3.7, 3.8 or 3.9
+
## Install in a local Python environment
By default, torch-ort-inference depends on PyTorch 1.12 and ONNX Runtime OpenVINO EP 1.12.
-Install torch-ort-inference with OpenVINO dependencies
+1. Install torch-ort-inference with OpenVINO dependencies.
-- `pip install torch-ort-inference[openvino]`
+ - `pip install torch-ort-inference[openvino]`
+
+2. Run post-installation script
-## Verify your installation
+ - `python -m torch_ort.configure`
-Once you have created your environment, using Python, execute the following steps to validate that your installation is correct.
+## Verify your installation
-1. Download a inference script
+Once you have created your environment, execute the following steps to validate that your installation is correct.
- - `wget https://raw.githubusercontent.com/pytorch/ort/main/torch_ort_inference/tests/bert_for_sequence_classification.py`
+1. Clone this repo
+ - `git clone git@github.com:pytorch/ort.git`
+
2. Install extra dependencies
- `pip install wget pandas transformers`
-
+
3. Run the inference script
- `python ./ort/torch_ort_inference/tests/bert_for_sequence_classification.py`
@@ -204,6 +214,11 @@ If no provider options are specified by user, OpenVINO™ Execution Provider is
backend = "CPU"
precision = "FP32"
```
+For more details on APIs, see [usage.md](/torch_ort_inference/docs/usage.md).
+
+### Note
+
+Currently, Vision models are supported on Intel® VPUs. Support for NLP models may be added in future releases.
## License
diff --git a/torch_ort_inference/docs/install.md b/torch_ort_inference/docs/install.md
index e8a215f9..c87601c3 100644
--- a/torch_ort_inference/docs/install.md
+++ b/torch_ort_inference/docs/install.md
@@ -2,6 +2,12 @@
You can install and run torch-ort-inference in your local environment.
+## Prerequisites
+
+- Ubuntu 18.04, 20.04
+
+- Python* 3.7, 3.8 or 3.9
+
## Run in a Python environment
### Default dependencies
diff --git a/torch_ort_inference/docs/usage.md b/torch_ort_inference/docs/usage.md
new file mode 100644
index 00000000..2dbe15ab
--- /dev/null
+++ b/torch_ort_inference/docs/usage.md
@@ -0,0 +1,42 @@
+# APIs for OpenVINO™ integration with TorchORT
+
+This document describes available Python APIs for OpenVINO™ integration with TorchORT to accelerate inference for PyTorch models on various Intel hardware.
+
+## Essential APIs
+
+To add the OpenVINO™ integration with TorchORT package to your PyTorch application, add following 2 lines of code:
+
+```python
+from torch_ort import ORTInferenceModule
+model = ORTInferenceModule(model)
+```
+
+By default, CPU backend with FP32 precision is enabled. You can set different backend and supported precision using OpenVINOProviderOptions as below:
+
+```python
+provider_options = OpenVINOProviderOptions(backend = "GPU", precision = "FP16")
+model = ORTInferenceModule(model, provider_options = provider_options)
+```
+Supported backend-precision combinations:
+| Backend | Precision |
+| --------| --------- |
+| CPU | FP32 |
+| GPU | FP32 |
+| GPU | FP16 |
+| MYRIAD | FP16 |
+
+## Additional APIs
+
+To save the inline exported onnx model, use DebugOptions as below:
+
+```python
+debug_options = DebugOptions(save_onnx=True, onnx_prefix='')
+model = ORTInferenceModule(model, debug_options=debug_options)
+```
+
+To enable verbose log of the execution of the TorchORT pipeline, use DebugOptions as below:
+
+```python
+debug_options = DebugOptions(log_level=LogLevel.VERBOSE)
+model = ORTInferenceModule(model, debug_options=debug_options)
+```
diff --git a/torch_ort_inference/tests/bert_for_sequence_classification.py b/torch_ort_inference/tests/bert_for_sequence_classification.py
index 6e8203ed..e9f3ce46 100644
--- a/torch_ort_inference/tests/bert_for_sequence_classification.py
+++ b/torch_ort_inference/tests/bert_for_sequence_classification.py
@@ -8,6 +8,7 @@
import numpy as np
import time
import pandas as pd
+import pathlib
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
@@ -16,30 +17,58 @@
from torch_ort import ORTInferenceModule, OpenVINOProviderOptions
ov_backend_precisions = {"CPU": ["FP32"], "GPU": ["FP32", "FP16"]}
-
+inference_execution_providers = ["openvino"]
def preprocess_input(tokenizer, sentences):
# Tokenization & Input Formatting
# Config: "do_lower_case": true, "model_max_length": 512
inputs = []
+ MAX_LEN = 64
+
for sentence in sentences:
- tokenized_inputs = tokenizer(
- sentence,
- return_tensors="pt",
- padding='max_length',
- truncation=True)
- inputs.append(tokenized_inputs)
+ # `encode` will:
+ # (1) Tokenize the sentence.
+ # (2) Prepend the `[CLS]` token to the start.
+ # (3) Append the `[SEP]` token to the end.
+ # (4) Map tokens to their IDs.
+ encoded_sent = tokenizer.encode(
+ sentence, # Sentence to encode.
+ add_special_tokens = True, # Add '[CLS]' and '[SEP]'
+ )
+
+ # Pad our input tokens with value 0.
+ if len(encoded_sent) < MAX_LEN:
+ encoded_sent.extend([0]*(MAX_LEN-len(encoded_sent)))
+
+ # Truncate to MAX_LEN
+ if len(encoded_sent) > MAX_LEN:
+ print("WARNING: During preprocessing, number of tokens for the sentence {}"\
+ "exceedeed MAX LENGTH {}. This might impact accuracy of the results".format(
+ sentence,
+ MAX_LEN
+ ))
+ encoded_sent = encoded_sent[:MAX_LEN]
+
+ # Create the attention mask.
+ # - If a token ID is 0, then it's padding, set the mask to 0.
+ # - If a token ID is > 0, then it's a real token, set the mask to 1.
+ att_mask = [int(token_id > 0) for token_id in encoded_sent]
+
+ # Store the input ids and attention masks for the sentence.
+ inputs.append({'input_ids': torch.unsqueeze(torch.tensor(encoded_sent),0),
+ 'attention_mask': torch.unsqueeze(torch.tensor(att_mask),0)})
return inputs
-def infer(model, tokenizer, inputs):
+def infer(model, sentences, inputs):
+ num_sentences = len(sentences)
total_infer_time = 0
results = {}
# Run inference
- for i in range(len(inputs)):
+ for i in range(num_sentences):
input_ids = (inputs[i])['input_ids']
attention_masks = (inputs[i])['attention_mask']
with torch.no_grad():
@@ -47,7 +76,6 @@ def infer(model, tokenizer, inputs):
if i == 0:
t0 = time.time()
model(input_ids, attention_masks)
- print("warm up time:", time.time()-t0)
# infer
t0 = time.time()
outputs = model(input_ids, attention_masks)
@@ -63,18 +91,21 @@ def infer(model, tokenizer, inputs):
# predictions
pred_flat = np.argmax(logits, axis=1).flatten()
- orig_sent = tokenizer.decode(input_ids[0],skip_special_tokens=True)
+ orig_sent = sentences[i]
results[orig_sent] = pred_flat[0]
- print("\n Top (20) Results: \n")
+ print("\n Number of sentences: {}".format(num_sentences))
+ if num_sentences > 20:
+ print(" First 20 results:")
+ print("\t Grammar correctness label (0=unacceptable, 1=acceptable)\n")
count = 0
for k, v in results.items():
print("\t{!r} : {!r}".format(k, v))
if count == 20:
break
count = count + 1
- print("\nInference time: {:.4f}s".format(total_infer_time))
-
+ print("\n Average inference time: {:.4f}ms".format((total_infer_time/num_sentences)*1000))
+ print(" Total Inference time: {:.4f}ms".format(total_infer_time * 1000))
def main():
# 1. Basic setup
@@ -85,7 +116,7 @@ def main():
"--pytorch-only",
action="store_true",
default=False,
- help="disables ONNX Runtime",
+ help="disables ONNX Runtime inference",
)
parser.add_argument(
"--input",
@@ -119,25 +150,59 @@ def main():
if not args.pytorch_only:
if args.provider is None:
print("OpenVINOExecutionProvider is enabled with CPU and FP32 by default.")
+ if args.backend or args.precision:
+ raise ValueError("Provider not specified!! Please specify provider arg along with backend and precision.")
elif args.provider == "openvino":
if args.backend and args.precision:
if args.backend not in list(ov_backend_precisions.keys()):
- raise Exception(
- "Invalid backend. Valid values are:",
- list(ov_backend_precisions.keys()),
- )
+ raise ValueError(
+ "Invalid backend. Valid values are: {}".format(
+ list(ov_backend_precisions.keys())))
if args.precision not in ov_backend_precisions[args.backend]:
- raise Exception("Invalid precision for provided backend. Valid values are:",
- list(ov_backend_precisions[args.backend]))
- else:
- print(
- "OpenVINOExecutionProvider is enabled with CPU and FP32 by default."
- + " Please specify both backend and precision to override.\n"
+ raise ValueError("Invalid precision for provided backend. Valid values are: {}".format(
+ list(ov_backend_precisions[args.backend])))
+ elif args.backend or args.precision:
+ raise ValueError(
+ "Please specify both backend and precision to override default options.\n"
)
+ else:
+ print("OpenVINOExecutionProvider is enabled with CPU and FP32 by default.")
else:
- raise Exception("Invalid execution provider!!")
+ raise ValueError("Invalid execution provider!! Available providers are: {}".format(inference_execution_providers))
+ else:
+ print("ONNXRuntime inference is disabled.")
+ if args.provider or args.precision or args.backend:
+ raise ValueError("provider, backend, precision arguments are not applicable for --pytorch-only option.")
+
+ # 2. Read input sentence(s)
+ # Input can be a single sentence, list of single sentences in a .tsv file.
+ if args.input and args.input_file:
+ raise ValueError("Please provide either input or input file for inference.")
- # 2. Load Model
+ if args.input is not None:
+ sentences = [args.input]
+ elif args.input_file is not None:
+ file_name = args.input_file
+ if not os.path.exists(file_name):
+ raise ValueError("Invalid input file path: %s" % file_name)
+ if os.stat(file_name).st_size == 0:
+ raise ValueError("Input file is empty!!")
+ name, ext = os.path.splitext(file_name)
+ if ext != ".tsv":
+ raise ValueError("Invalid input file format. Please provide .tsv file.")
+ df = pd.read_csv(
+ file_name,
+ delimiter="\t",
+ header=None,
+ names=["Id", "Sentence"],
+ skiprows=1,
+ )
+ sentences = df.Sentence.values
+ else:
+ print("Input not provided! Using default input...")
+ sentences = ["This is a BERT sample.","User input is valid not."]
+
+ # 3. Load Model
# Pretrained model fine-tuned on CoLA dataset from huggingface model hub to predict grammar correctness
model = AutoModelForSequenceClassification.from_pretrained(
"textattack/bert-base-uncased-CoLA"
@@ -155,31 +220,12 @@ def main():
# Convert model for evaluation
model.eval()
- # 3. Read input sentence(s)
- # Input can be a single sentence, list of single sentences in a .tsv file.
- if args.input is not None:
- sentences = [args.input]
- elif args.input_file is not None:
- if not os.path.exists(args.input_file):
- raise ValueError("Invalid input file path: %s" % args.input_file)
- df = pd.read_csv(
- args.input_file,
- delimiter="\t",
- header=None,
- names=["Id", "Sentence"],
- skiprows=1,
- )
- sentences = df.Sentence.values
- else:
- print("Input not provided! Using default input...")
- sentences = ["This is a sample input."]
-
# 4. Load Tokenizer & Preprocess input sentences
tokenizer = AutoTokenizer.from_pretrained("textattack/bert-base-uncased-CoLA")
inputs = preprocess_input(tokenizer, sentences)
# 5. Infer
- infer(model, tokenizer, inputs)
+ infer(model, sentences, inputs)
if __name__ == "__main__":
diff --git a/torch_ort_inference/tests/resnet_image_classification.py b/torch_ort_inference/tests/resnet_image_classification.py
index 3b950a24..a6896d6a 100644
--- a/torch_ort_inference/tests/resnet_image_classification.py
+++ b/torch_ort_inference/tests/resnet_image_classification.py
@@ -1,163 +1,167 @@
-# -------------------------------------------------------------------------
-# Copyright (C) 2022 Intel Corporation
-# Licensed under the MIT License
-# --------------------------------------------------------------------------
-
-import os
-import time
-import torch
-import wget
-import argparse
-from PIL import Image
-from torchvision import transforms
-import torchvision.models as models
-from torch_ort import (
- ORTInferenceModule,
- OpenVINOProviderOptions,
-)
-
-ov_backend_precisions = {"CPU": ["FP32"], "GPU": ["FP32", "FP16"], "MYRIAD": ["FP16"]}
-
-def download_labels(labels):
- if not labels:
- labels = "imagenet_classes.txt"
- if not os.path.exists(labels):
- labelsUrl = (
- "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
- )
- # Download the file (if we haven't already)
- wget.download(labelsUrl)
- else:
- print("\nReusing downloaded imagenet labels")
-
- # Read the categories
- with open(labels, "r") as f:
- categories = [s.strip() for s in f.readlines()]
- return categories
-
-
-def preprocess(img):
- transform = transforms.Compose(
- [
- transforms.Resize(256),
- transforms.CenterCrop(224),
- transforms.ToTensor(),
- transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
- ]
- )
- return transform(img)
-
-
-def infer(model, image, categories):
- # warmup
- model(image)
-
- # Start inference
- t0 = time.time()
- outputs = model(image)
- t1 = time.time() - t0
- print("\nInference time: {:.4f}ms\n".format(t1 * 1000))
-
- # The output has unnormalized scores. Run a softmax on it for probabilities.
- probabilities = torch.nn.functional.softmax(outputs[0], dim=0)
-
- # Show top categories per image
- top5_prob, top5_catid = torch.topk(probabilities, 5)
- print("Top 5 Results: \nLabels , Probabilities:")
- for i in range(top5_prob.size(0)):
- print(categories[top5_catid[i]], top5_prob[i].item())
-
-
-def main():
- # 1. Basic setup
- parser = argparse.ArgumentParser(description="PyTorch Image Classification Example")
-
- parser.add_argument(
- "--pytorch-only",
- action="store_true",
- default=False,
- help="disables ONNX Runtime",
- )
- parser.add_argument(
- "--labels",
- type=str,
- help="path to labels file")
- parser.add_argument(
- "--input-file",
- type=str,
- required=True,
- help="path to input image file"
- )
- parser.add_argument(
- "--provider",
- type=str,
- help="ONNX Runtime Execution Provider",
- )
- parser.add_argument(
- "--backend",
- type=str,
- help="OpenVINO target device (CPU, GPU or MYRIAD)"
- )
- parser.add_argument(
- "--precision",
- type=str,
- help="OpenVINO target device precision (FP16 or FP32)"
- )
-
- args = parser.parse_args()
-
- # parameters validation
- if not args.pytorch_only:
- if args.provider is None:
- print("OpenVINOExecutionProvider is enabled with CPU and FP32 by default.")
- elif args.provider == "openvino":
- if args.backend and args.precision:
- if args.backend not in list(ov_backend_precisions.keys()):
- raise Exception(
- "Invalid backend. Valid values are:",
- list(ov_backend_precisions.keys()),
- )
- if args.precision not in ov_backend_precisions[args.backend]:
- raise Exception("Invalid precision for provided backend. Valid values are:",
- list(ov_backend_precisions[args.backend]))
- else:
- print(
- "OpenVINOExecutionProvider is enabled with CPU and FP32 by default."
- + " Please specify both backend and precision to override.\n"
- )
- else:
- raise Exception("Invalid execution provider!!")
-
- # 2. Download and load the model
- model = models.resnet50(pretrained=True)
- if not args.pytorch_only:
- if args.provider == "openvino" and (args.backend and args.precision):
- provider_options = OpenVINOProviderOptions(
- backend=args.backend, precision=args.precision
- )
- model = ORTInferenceModule(model, provider_options=provider_options)
- else:
- model = ORTInferenceModule(model)
-
- # Convert model for evaluation
- model.eval()
-
- # 3. Download ImageNet labels
- categories = download_labels(args.labels)
-
- # 4. Read input image file and preprocess
- if not args.input_file:
- raise ValueError("Path to input image not provided!")
- if not os.path.exists(args.input_file):
- raise ValueError("Invalid input file path")
- img = Image.open(args.input_file)
- img_trans = preprocess(img)
- # Adding batch dimension (size 1)
- img_trans = torch.unsqueeze(img_trans, 0)
-
- # 5. Infer
- infer(model, img_trans, categories)
-
-
-if __name__ == "__main__":
- main()
+# -------------------------------------------------------------------------
+# Copyright (C) 2022 Intel Corporation
+# Licensed under the MIT License
+# --------------------------------------------------------------------------
+
+import os
+import time
+import torch
+import wget
+import argparse
+from PIL import Image
+from torchvision import transforms
+import torchvision.models as models
+from torch_ort import ORTInferenceModule, OpenVINOProviderOptions
+
+ov_backend_precisions = {"CPU": ["FP32"], "GPU": ["FP32", "FP16"], "MYRIAD": ["FP16"]}
+inference_execution_providers = ["openvino"]
+
+def download_labels(labels):
+ if not labels:
+ labels = "imagenet_classes.txt"
+ if not os.path.exists(labels):
+ labelsUrl = (
+ "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
+ )
+ # Download the file (if we haven't already)
+ wget.download(labelsUrl)
+ else:
+ print("\nReusing downloaded imagenet labels")
+
+ # Read the categories
+ with open(labels, "r") as f:
+ categories = [s.strip() for s in f.readlines()]
+ return categories
+
+
+def preprocess(img):
+ transform = transforms.Compose(
+ [
+ transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.ToTensor(),
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+ ]
+ )
+ return transform(img)
+
+
+def infer(model, image, categories):
+ # warmup
+ model(image)
+
+ # Start inference
+ t0 = time.time()
+ outputs = model(image)
+ t1 = time.time() - t0
+ print("\nInference time: {:.4f}ms\n".format(t1 * 1000))
+
+ # The output has unnormalized scores. Run a softmax on it for probabilities.
+ probabilities = torch.nn.functional.softmax(outputs[0], dim=0)
+
+ # Show top categories per image
+ top5_prob, top5_catid = torch.topk(probabilities, 5)
+ print("Top 5 Results: \nLabels , Probabilities:")
+ for i in range(top5_prob.size(0)):
+ print(categories[top5_catid[i]], top5_prob[i].item())
+
+
+def main():
+ # 1. Basic setup
+ parser = argparse.ArgumentParser(description="PyTorch Image Classification Example")
+
+ parser.add_argument(
+ "--pytorch-only",
+ action="store_true",
+ default=False,
+ help="disables ONNX Runtime inference",
+ )
+ parser.add_argument(
+ "--labels",
+ type=str,
+ help="path to labels file")
+ parser.add_argument(
+ "--input-file",
+ type=str,
+ required=True,
+ help="path to input image file"
+ )
+ parser.add_argument(
+ "--provider",
+ type=str,
+ help="ONNX Runtime Execution Provider",
+ )
+ parser.add_argument(
+ "--backend",
+ type=str,
+ help="OpenVINO target device (CPU, GPU or MYRIAD)"
+ )
+ parser.add_argument(
+ "--precision",
+ type=str,
+ help="OpenVINO target device precision (FP16 or FP32)"
+ )
+
+ args = parser.parse_args()
+
+ # parameters validation
+ if not args.pytorch_only:
+ if args.provider is None:
+ print("OpenVINOExecutionProvider is enabled with CPU and FP32 by default.")
+ if args.backend or args.precision:
+ raise ValueError("Provider not specified!! Please specify provider arg along with backend and precision.")
+ elif args.provider == "openvino":
+ if args.backend and args.precision:
+ if args.backend not in list(ov_backend_precisions.keys()):
+ raise ValueError(
+ "Invalid backend. Valid values are: {}".format(
+ list(ov_backend_precisions.keys())))
+ if args.precision not in ov_backend_precisions[args.backend]:
+ raise ValueError("Invalid precision for provided backend. Valid values are: {}".format(
+ list(ov_backend_precisions[args.backend])))
+ elif args.backend or args.precision:
+ raise ValueError(
+ "Please specify both backend and precision to override default options.\n"
+ )
+ else:
+ print("OpenVINOExecutionProvider is enabled with CPU and FP32 by default.")
+ else:
+ raise ValueError("Invalid execution provider!! Available providers are: {}".format(inference_execution_providers))
+ else:
+ print("ONNXRuntime inference is disabled.")
+ if args.provider or args.precision or args.backend:
+ raise ValueError("provider, backend, precision arguments are not applicable for --pytorch-only option.")
+
+ # 2. Read input image file and preprocess
+ if not args.input_file:
+ raise ValueError("Path to input image not provided!")
+ if not os.path.exists(args.input_file):
+ raise ValueError("Invalid input file path.")
+ img = Image.open(args.input_file)
+ img_trans = preprocess(img)
+ # Adding batch dimension (size 1)
+ img_trans = torch.unsqueeze(img_trans, 0)
+
+ # 3. Download and load the model
+ model = models.resnet50(pretrained=True)
+ if not args.pytorch_only:
+ if args.provider == "openvino" and (args.backend and args.precision):
+ provider_options = OpenVINOProviderOptions(
+ backend=args.backend, precision=args.precision
+ )
+ model = ORTInferenceModule(model, provider_options=provider_options)
+ else:
+ model = ORTInferenceModule(model)
+
+ # Convert model for evaluation
+ model.eval()
+
+ # 4. Download ImageNet labels
+ categories = download_labels(args.labels)
+
+ # 5. Infer
+ infer(model, img_trans, categories)
+ img.close()
+
+if __name__ == "__main__":
+ main()
diff --git a/torch_ort_inference/torch_ort/ortinferencemodule/ortinferencemodule.py b/torch_ort_inference/torch_ort/ortinferencemodule/ortinferencemodule.py
index 3d0fd066..dbfa6543 100644
--- a/torch_ort_inference/torch_ort/ortinferencemodule/ortinferencemodule.py
+++ b/torch_ort_inference/torch_ort/ortinferencemodule/ortinferencemodule.py
@@ -106,8 +106,7 @@ def _forward_call(self, *inputs, **kwargs):
# Use IO binding
onnx_input_names = [inp.name for inp in self._onnx_models.exported_model.graph.input]
- input_info = _io.parse_inputs_for_onnx_export(self._module_parameters, None, schema, inputs, kwargs)
- inputs = _utils_infer.get_user_inputs(onnx_input_names, input_info, inputs, kwargs, self._device)
+ inputs = _utils_infer.get_user_inputs(onnx_input_names, self._flattened_module._input_info, inputs, kwargs, self._device)
io_binding = self._inference_session.io_binding()
_utils._create_iobinding(io_binding, inputs, self._onnx_models.exported_model, self._device)