diff --git a/Readme.md b/Readme.md index b901525a..5817218f 100644 --- a/Readme.md +++ b/Readme.md @@ -147,32 +147,42 @@ To see torch-ort in action, see https://github.com/microsoft/onnxruntime-trainin # Accelerate inference for PyTorch models with ONNX Runtime (Preview) -ONNX Runtime for PyTorch accelerates PyTorch model inference using ONNX Runtime. +ONNX Runtime for PyTorch is now extended to support PyTorch model inference using ONNX Runtime. -It is available via the torch-ort-inference python package. This preview package enables OpenVINO™ Execution Provider for ONNX Runtime by default for accelerating inference on various Intel CPUs and integrated GPUs. +It is available via the torch-ort-inference python package. This preview package enables OpenVINO™ Execution Provider for ONNX Runtime by default for accelerating inference on various Intel® CPUs, Intel® integrated GPUs, and Intel® Movidius™ Vision Processing Units - referred to as VPU. This repository contains the source code for the package, as well as instructions for running the package. +## Prerequisites + +- Ubuntu 18.04, 20.04 + +- Python* 3.7, 3.8 or 3.9 + ## Install in a local Python environment By default, torch-ort-inference depends on PyTorch 1.12 and ONNX Runtime OpenVINO EP 1.12. -Install torch-ort-inference with OpenVINO dependencies +1. Install torch-ort-inference with OpenVINO dependencies. -- `pip install torch-ort-inference[openvino]` + - `pip install torch-ort-inference[openvino]` +

+2. Run post-installation script -## Verify your installation + - `python -m torch_ort.configure` -Once you have created your environment, using Python, execute the following steps to validate that your installation is correct. +## Verify your installation -1. Download a inference script +Once you have created your environment, execute the following steps to validate that your installation is correct. - - `wget https://raw.githubusercontent.com/pytorch/ort/main/torch_ort_inference/tests/bert_for_sequence_classification.py` +1. Clone this repo + - `git clone git@github.com:pytorch/ort.git` +

2. Install extra dependencies - `pip install wget pandas transformers` - +

3. Run the inference script - `python ./ort/torch_ort_inference/tests/bert_for_sequence_classification.py` @@ -204,6 +214,11 @@ If no provider options are specified by user, OpenVINO™ Execution Provider is backend = "CPU" precision = "FP32" ``` +For more details on APIs, see [usage.md](/torch_ort_inference/docs/usage.md). + +### Note + +Currently, Vision models are supported on Intel® VPUs. Support for NLP models may be added in future releases. ## License diff --git a/torch_ort_inference/docs/install.md b/torch_ort_inference/docs/install.md index e8a215f9..c87601c3 100644 --- a/torch_ort_inference/docs/install.md +++ b/torch_ort_inference/docs/install.md @@ -2,6 +2,12 @@ You can install and run torch-ort-inference in your local environment. +## Prerequisites + +- Ubuntu 18.04, 20.04 + +- Python* 3.7, 3.8 or 3.9 + ## Run in a Python environment ### Default dependencies diff --git a/torch_ort_inference/docs/usage.md b/torch_ort_inference/docs/usage.md new file mode 100644 index 00000000..2dbe15ab --- /dev/null +++ b/torch_ort_inference/docs/usage.md @@ -0,0 +1,42 @@ +# APIs for OpenVINO™ integration with TorchORT + +This document describes available Python APIs for OpenVINO™ integration with TorchORT to accelerate inference for PyTorch models on various Intel hardware. + +## Essential APIs + +To add the OpenVINO™ integration with TorchORT package to your PyTorch application, add following 2 lines of code: + +```python +from torch_ort import ORTInferenceModule +model = ORTInferenceModule(model) +``` + +By default, CPU backend with FP32 precision is enabled. You can set different backend and supported precision using OpenVINOProviderOptions as below: + +```python +provider_options = OpenVINOProviderOptions(backend = "GPU", precision = "FP16") +model = ORTInferenceModule(model, provider_options = provider_options) +``` +Supported backend-precision combinations: +| Backend | Precision | +| --------| --------- | +| CPU | FP32 | +| GPU | FP32 | +| GPU | FP16 | +| MYRIAD | FP16 | + +## Additional APIs + +To save the inline exported onnx model, use DebugOptions as below: + +```python +debug_options = DebugOptions(save_onnx=True, onnx_prefix='') +model = ORTInferenceModule(model, debug_options=debug_options) +``` + +To enable verbose log of the execution of the TorchORT pipeline, use DebugOptions as below: + +```python +debug_options = DebugOptions(log_level=LogLevel.VERBOSE) +model = ORTInferenceModule(model, debug_options=debug_options) +``` diff --git a/torch_ort_inference/tests/bert_for_sequence_classification.py b/torch_ort_inference/tests/bert_for_sequence_classification.py index 6e8203ed..e9f3ce46 100644 --- a/torch_ort_inference/tests/bert_for_sequence_classification.py +++ b/torch_ort_inference/tests/bert_for_sequence_classification.py @@ -8,6 +8,7 @@ import numpy as np import time import pandas as pd +import pathlib from transformers import AutoTokenizer from transformers import AutoModelForSequenceClassification @@ -16,30 +17,58 @@ from torch_ort import ORTInferenceModule, OpenVINOProviderOptions ov_backend_precisions = {"CPU": ["FP32"], "GPU": ["FP32", "FP16"]} - +inference_execution_providers = ["openvino"] def preprocess_input(tokenizer, sentences): # Tokenization & Input Formatting # Config: "do_lower_case": true, "model_max_length": 512 inputs = [] + MAX_LEN = 64 + for sentence in sentences: - tokenized_inputs = tokenizer( - sentence, - return_tensors="pt", - padding='max_length', - truncation=True) - inputs.append(tokenized_inputs) + # `encode` will: + # (1) Tokenize the sentence. + # (2) Prepend the `[CLS]` token to the start. + # (3) Append the `[SEP]` token to the end. + # (4) Map tokens to their IDs. + encoded_sent = tokenizer.encode( + sentence, # Sentence to encode. + add_special_tokens = True, # Add '[CLS]' and '[SEP]' + ) + + # Pad our input tokens with value 0. + if len(encoded_sent) < MAX_LEN: + encoded_sent.extend([0]*(MAX_LEN-len(encoded_sent))) + + # Truncate to MAX_LEN + if len(encoded_sent) > MAX_LEN: + print("WARNING: During preprocessing, number of tokens for the sentence {}"\ + "exceedeed MAX LENGTH {}. This might impact accuracy of the results".format( + sentence, + MAX_LEN + )) + encoded_sent = encoded_sent[:MAX_LEN] + + # Create the attention mask. + # - If a token ID is 0, then it's padding, set the mask to 0. + # - If a token ID is > 0, then it's a real token, set the mask to 1. + att_mask = [int(token_id > 0) for token_id in encoded_sent] + + # Store the input ids and attention masks for the sentence. + inputs.append({'input_ids': torch.unsqueeze(torch.tensor(encoded_sent),0), + 'attention_mask': torch.unsqueeze(torch.tensor(att_mask),0)}) return inputs -def infer(model, tokenizer, inputs): +def infer(model, sentences, inputs): + num_sentences = len(sentences) total_infer_time = 0 results = {} # Run inference - for i in range(len(inputs)): + for i in range(num_sentences): input_ids = (inputs[i])['input_ids'] attention_masks = (inputs[i])['attention_mask'] with torch.no_grad(): @@ -47,7 +76,6 @@ def infer(model, tokenizer, inputs): if i == 0: t0 = time.time() model(input_ids, attention_masks) - print("warm up time:", time.time()-t0) # infer t0 = time.time() outputs = model(input_ids, attention_masks) @@ -63,18 +91,21 @@ def infer(model, tokenizer, inputs): # predictions pred_flat = np.argmax(logits, axis=1).flatten() - orig_sent = tokenizer.decode(input_ids[0],skip_special_tokens=True) + orig_sent = sentences[i] results[orig_sent] = pred_flat[0] - print("\n Top (20) Results: \n") + print("\n Number of sentences: {}".format(num_sentences)) + if num_sentences > 20: + print(" First 20 results:") + print("\t Grammar correctness label (0=unacceptable, 1=acceptable)\n") count = 0 for k, v in results.items(): print("\t{!r} : {!r}".format(k, v)) if count == 20: break count = count + 1 - print("\nInference time: {:.4f}s".format(total_infer_time)) - + print("\n Average inference time: {:.4f}ms".format((total_infer_time/num_sentences)*1000)) + print(" Total Inference time: {:.4f}ms".format(total_infer_time * 1000)) def main(): # 1. Basic setup @@ -85,7 +116,7 @@ def main(): "--pytorch-only", action="store_true", default=False, - help="disables ONNX Runtime", + help="disables ONNX Runtime inference", ) parser.add_argument( "--input", @@ -119,25 +150,59 @@ def main(): if not args.pytorch_only: if args.provider is None: print("OpenVINOExecutionProvider is enabled with CPU and FP32 by default.") + if args.backend or args.precision: + raise ValueError("Provider not specified!! Please specify provider arg along with backend and precision.") elif args.provider == "openvino": if args.backend and args.precision: if args.backend not in list(ov_backend_precisions.keys()): - raise Exception( - "Invalid backend. Valid values are:", - list(ov_backend_precisions.keys()), - ) + raise ValueError( + "Invalid backend. Valid values are: {}".format( + list(ov_backend_precisions.keys()))) if args.precision not in ov_backend_precisions[args.backend]: - raise Exception("Invalid precision for provided backend. Valid values are:", - list(ov_backend_precisions[args.backend])) - else: - print( - "OpenVINOExecutionProvider is enabled with CPU and FP32 by default." - + " Please specify both backend and precision to override.\n" + raise ValueError("Invalid precision for provided backend. Valid values are: {}".format( + list(ov_backend_precisions[args.backend]))) + elif args.backend or args.precision: + raise ValueError( + "Please specify both backend and precision to override default options.\n" ) + else: + print("OpenVINOExecutionProvider is enabled with CPU and FP32 by default.") else: - raise Exception("Invalid execution provider!!") + raise ValueError("Invalid execution provider!! Available providers are: {}".format(inference_execution_providers)) + else: + print("ONNXRuntime inference is disabled.") + if args.provider or args.precision or args.backend: + raise ValueError("provider, backend, precision arguments are not applicable for --pytorch-only option.") + + # 2. Read input sentence(s) + # Input can be a single sentence, list of single sentences in a .tsv file. + if args.input and args.input_file: + raise ValueError("Please provide either input or input file for inference.") - # 2. Load Model + if args.input is not None: + sentences = [args.input] + elif args.input_file is not None: + file_name = args.input_file + if not os.path.exists(file_name): + raise ValueError("Invalid input file path: %s" % file_name) + if os.stat(file_name).st_size == 0: + raise ValueError("Input file is empty!!") + name, ext = os.path.splitext(file_name) + if ext != ".tsv": + raise ValueError("Invalid input file format. Please provide .tsv file.") + df = pd.read_csv( + file_name, + delimiter="\t", + header=None, + names=["Id", "Sentence"], + skiprows=1, + ) + sentences = df.Sentence.values + else: + print("Input not provided! Using default input...") + sentences = ["This is a BERT sample.","User input is valid not."] + + # 3. Load Model # Pretrained model fine-tuned on CoLA dataset from huggingface model hub to predict grammar correctness model = AutoModelForSequenceClassification.from_pretrained( "textattack/bert-base-uncased-CoLA" @@ -155,31 +220,12 @@ def main(): # Convert model for evaluation model.eval() - # 3. Read input sentence(s) - # Input can be a single sentence, list of single sentences in a .tsv file. - if args.input is not None: - sentences = [args.input] - elif args.input_file is not None: - if not os.path.exists(args.input_file): - raise ValueError("Invalid input file path: %s" % args.input_file) - df = pd.read_csv( - args.input_file, - delimiter="\t", - header=None, - names=["Id", "Sentence"], - skiprows=1, - ) - sentences = df.Sentence.values - else: - print("Input not provided! Using default input...") - sentences = ["This is a sample input."] - # 4. Load Tokenizer & Preprocess input sentences tokenizer = AutoTokenizer.from_pretrained("textattack/bert-base-uncased-CoLA") inputs = preprocess_input(tokenizer, sentences) # 5. Infer - infer(model, tokenizer, inputs) + infer(model, sentences, inputs) if __name__ == "__main__": diff --git a/torch_ort_inference/tests/resnet_image_classification.py b/torch_ort_inference/tests/resnet_image_classification.py index 3b950a24..a6896d6a 100644 --- a/torch_ort_inference/tests/resnet_image_classification.py +++ b/torch_ort_inference/tests/resnet_image_classification.py @@ -1,163 +1,167 @@ -# ------------------------------------------------------------------------- -# Copyright (C) 2022 Intel Corporation -# Licensed under the MIT License -# -------------------------------------------------------------------------- - -import os -import time -import torch -import wget -import argparse -from PIL import Image -from torchvision import transforms -import torchvision.models as models -from torch_ort import ( - ORTInferenceModule, - OpenVINOProviderOptions, -) - -ov_backend_precisions = {"CPU": ["FP32"], "GPU": ["FP32", "FP16"], "MYRIAD": ["FP16"]} - -def download_labels(labels): - if not labels: - labels = "imagenet_classes.txt" - if not os.path.exists(labels): - labelsUrl = ( - "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt" - ) - # Download the file (if we haven't already) - wget.download(labelsUrl) - else: - print("\nReusing downloaded imagenet labels") - - # Read the categories - with open(labels, "r") as f: - categories = [s.strip() for s in f.readlines()] - return categories - - -def preprocess(img): - transform = transforms.Compose( - [ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ) - return transform(img) - - -def infer(model, image, categories): - # warmup - model(image) - - # Start inference - t0 = time.time() - outputs = model(image) - t1 = time.time() - t0 - print("\nInference time: {:.4f}ms\n".format(t1 * 1000)) - - # The output has unnormalized scores. Run a softmax on it for probabilities. - probabilities = torch.nn.functional.softmax(outputs[0], dim=0) - - # Show top categories per image - top5_prob, top5_catid = torch.topk(probabilities, 5) - print("Top 5 Results: \nLabels , Probabilities:") - for i in range(top5_prob.size(0)): - print(categories[top5_catid[i]], top5_prob[i].item()) - - -def main(): - # 1. Basic setup - parser = argparse.ArgumentParser(description="PyTorch Image Classification Example") - - parser.add_argument( - "--pytorch-only", - action="store_true", - default=False, - help="disables ONNX Runtime", - ) - parser.add_argument( - "--labels", - type=str, - help="path to labels file") - parser.add_argument( - "--input-file", - type=str, - required=True, - help="path to input image file" - ) - parser.add_argument( - "--provider", - type=str, - help="ONNX Runtime Execution Provider", - ) - parser.add_argument( - "--backend", - type=str, - help="OpenVINO target device (CPU, GPU or MYRIAD)" - ) - parser.add_argument( - "--precision", - type=str, - help="OpenVINO target device precision (FP16 or FP32)" - ) - - args = parser.parse_args() - - # parameters validation - if not args.pytorch_only: - if args.provider is None: - print("OpenVINOExecutionProvider is enabled with CPU and FP32 by default.") - elif args.provider == "openvino": - if args.backend and args.precision: - if args.backend not in list(ov_backend_precisions.keys()): - raise Exception( - "Invalid backend. Valid values are:", - list(ov_backend_precisions.keys()), - ) - if args.precision not in ov_backend_precisions[args.backend]: - raise Exception("Invalid precision for provided backend. Valid values are:", - list(ov_backend_precisions[args.backend])) - else: - print( - "OpenVINOExecutionProvider is enabled with CPU and FP32 by default." - + " Please specify both backend and precision to override.\n" - ) - else: - raise Exception("Invalid execution provider!!") - - # 2. Download and load the model - model = models.resnet50(pretrained=True) - if not args.pytorch_only: - if args.provider == "openvino" and (args.backend and args.precision): - provider_options = OpenVINOProviderOptions( - backend=args.backend, precision=args.precision - ) - model = ORTInferenceModule(model, provider_options=provider_options) - else: - model = ORTInferenceModule(model) - - # Convert model for evaluation - model.eval() - - # 3. Download ImageNet labels - categories = download_labels(args.labels) - - # 4. Read input image file and preprocess - if not args.input_file: - raise ValueError("Path to input image not provided!") - if not os.path.exists(args.input_file): - raise ValueError("Invalid input file path") - img = Image.open(args.input_file) - img_trans = preprocess(img) - # Adding batch dimension (size 1) - img_trans = torch.unsqueeze(img_trans, 0) - - # 5. Infer - infer(model, img_trans, categories) - - -if __name__ == "__main__": - main() +# ------------------------------------------------------------------------- +# Copyright (C) 2022 Intel Corporation +# Licensed under the MIT License +# -------------------------------------------------------------------------- + +import os +import time +import torch +import wget +import argparse +from PIL import Image +from torchvision import transforms +import torchvision.models as models +from torch_ort import ORTInferenceModule, OpenVINOProviderOptions + +ov_backend_precisions = {"CPU": ["FP32"], "GPU": ["FP32", "FP16"], "MYRIAD": ["FP16"]} +inference_execution_providers = ["openvino"] + +def download_labels(labels): + if not labels: + labels = "imagenet_classes.txt" + if not os.path.exists(labels): + labelsUrl = ( + "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt" + ) + # Download the file (if we haven't already) + wget.download(labelsUrl) + else: + print("\nReusing downloaded imagenet labels") + + # Read the categories + with open(labels, "r") as f: + categories = [s.strip() for s in f.readlines()] + return categories + + +def preprocess(img): + transform = transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ) + return transform(img) + + +def infer(model, image, categories): + # warmup + model(image) + + # Start inference + t0 = time.time() + outputs = model(image) + t1 = time.time() - t0 + print("\nInference time: {:.4f}ms\n".format(t1 * 1000)) + + # The output has unnormalized scores. Run a softmax on it for probabilities. + probabilities = torch.nn.functional.softmax(outputs[0], dim=0) + + # Show top categories per image + top5_prob, top5_catid = torch.topk(probabilities, 5) + print("Top 5 Results: \nLabels , Probabilities:") + for i in range(top5_prob.size(0)): + print(categories[top5_catid[i]], top5_prob[i].item()) + + +def main(): + # 1. Basic setup + parser = argparse.ArgumentParser(description="PyTorch Image Classification Example") + + parser.add_argument( + "--pytorch-only", + action="store_true", + default=False, + help="disables ONNX Runtime inference", + ) + parser.add_argument( + "--labels", + type=str, + help="path to labels file") + parser.add_argument( + "--input-file", + type=str, + required=True, + help="path to input image file" + ) + parser.add_argument( + "--provider", + type=str, + help="ONNX Runtime Execution Provider", + ) + parser.add_argument( + "--backend", + type=str, + help="OpenVINO target device (CPU, GPU or MYRIAD)" + ) + parser.add_argument( + "--precision", + type=str, + help="OpenVINO target device precision (FP16 or FP32)" + ) + + args = parser.parse_args() + + # parameters validation + if not args.pytorch_only: + if args.provider is None: + print("OpenVINOExecutionProvider is enabled with CPU and FP32 by default.") + if args.backend or args.precision: + raise ValueError("Provider not specified!! Please specify provider arg along with backend and precision.") + elif args.provider == "openvino": + if args.backend and args.precision: + if args.backend not in list(ov_backend_precisions.keys()): + raise ValueError( + "Invalid backend. Valid values are: {}".format( + list(ov_backend_precisions.keys()))) + if args.precision not in ov_backend_precisions[args.backend]: + raise ValueError("Invalid precision for provided backend. Valid values are: {}".format( + list(ov_backend_precisions[args.backend]))) + elif args.backend or args.precision: + raise ValueError( + "Please specify both backend and precision to override default options.\n" + ) + else: + print("OpenVINOExecutionProvider is enabled with CPU and FP32 by default.") + else: + raise ValueError("Invalid execution provider!! Available providers are: {}".format(inference_execution_providers)) + else: + print("ONNXRuntime inference is disabled.") + if args.provider or args.precision or args.backend: + raise ValueError("provider, backend, precision arguments are not applicable for --pytorch-only option.") + + # 2. Read input image file and preprocess + if not args.input_file: + raise ValueError("Path to input image not provided!") + if not os.path.exists(args.input_file): + raise ValueError("Invalid input file path.") + img = Image.open(args.input_file) + img_trans = preprocess(img) + # Adding batch dimension (size 1) + img_trans = torch.unsqueeze(img_trans, 0) + + # 3. Download and load the model + model = models.resnet50(pretrained=True) + if not args.pytorch_only: + if args.provider == "openvino" and (args.backend and args.precision): + provider_options = OpenVINOProviderOptions( + backend=args.backend, precision=args.precision + ) + model = ORTInferenceModule(model, provider_options=provider_options) + else: + model = ORTInferenceModule(model) + + # Convert model for evaluation + model.eval() + + # 4. Download ImageNet labels + categories = download_labels(args.labels) + + # 5. Infer + infer(model, img_trans, categories) + img.close() + +if __name__ == "__main__": + main() diff --git a/torch_ort_inference/torch_ort/ortinferencemodule/ortinferencemodule.py b/torch_ort_inference/torch_ort/ortinferencemodule/ortinferencemodule.py index 3d0fd066..dbfa6543 100644 --- a/torch_ort_inference/torch_ort/ortinferencemodule/ortinferencemodule.py +++ b/torch_ort_inference/torch_ort/ortinferencemodule/ortinferencemodule.py @@ -106,8 +106,7 @@ def _forward_call(self, *inputs, **kwargs): # Use IO binding onnx_input_names = [inp.name for inp in self._onnx_models.exported_model.graph.input] - input_info = _io.parse_inputs_for_onnx_export(self._module_parameters, None, schema, inputs, kwargs) - inputs = _utils_infer.get_user_inputs(onnx_input_names, input_info, inputs, kwargs, self._device) + inputs = _utils_infer.get_user_inputs(onnx_input_names, self._flattened_module._input_info, inputs, kwargs, self._device) io_binding = self._inference_session.io_binding() _utils._create_iobinding(io_binding, inputs, self._onnx_models.exported_model, self._device)