From 67ea747e46dede68a1732b2906b353c7f7bda126 Mon Sep 17 00:00:00 2001
From: Laxmi Ganesan <laxmi.ganesan@intel.com>
Date: Thu, 16 Jun 2022 12:57:24 -0700
Subject: [PATCH] Changing MAX_LEN to 64 during preprocessing

Fixing issues identified during validation

More fixes for issues identified during review

Changing LF line sequence

Adding back license for resnet sample

fixing ReadMe instructions

removing version file generated from build

fix to close image file

Update Readme.md

Fix avg inf time

Minor fixes

- modified preprocessing in BERT sample to throw a warning
message when truncation happens.
- modified ortinferencemodule to reuse _inputs_info

Print message change

Reuse _input_info

Changing exceptions to valueerrors, fixing default options

Updating Readme and usage docs

removing test labels file

Update usage.md
---
 Readme.md                                     |  33 +-
 torch_ort_inference/docs/install.md           |   6 +
 torch_ort_inference/docs/usage.md             |  42 +++
 .../tests/bert_for_sequence_classification.py | 140 +++++---
 .../tests/resnet_image_classification.py      | 330 +++++++++---------
 .../ortinferencemodule/ortinferencemodule.py  |   3 +-
 6 files changed, 333 insertions(+), 221 deletions(-)
 create mode 100644 torch_ort_inference/docs/usage.md
diff --git a/Readme.md b/Readme.md
index b901525a..5817218f 100644
--- a/Readme.md
+++ b/Readme.md
@@ -147,32 +147,42 @@ To see torch-ort in action, see https://github.com/microsoft/onnxruntime-trainin
 
 # Accelerate inference for PyTorch models with ONNX Runtime (Preview)
 
-ONNX Runtime for PyTorch accelerates PyTorch model inference using ONNX Runtime.
+ONNX Runtime for PyTorch is now extended to support PyTorch model inference using ONNX Runtime.
 
-It is available via the torch-ort-inference python package. This preview package enables OpenVINO™ Execution Provider for ONNX Runtime by default for accelerating inference on various Intel CPUs and integrated GPUs.
+It is available via the torch-ort-inference python package. This preview package enables OpenVINO™ Execution Provider for ONNX Runtime by default for accelerating inference on various Intel® CPUs, Intel® integrated GPUs, and Intel® Movidius™ Vision Processing Units - referred to as VPU.
 
 This repository contains the source code for the package, as well as instructions for running the package.
 
+## Prerequisites
+
+- Ubuntu 18.04, 20.04
+
+- Python* 3.7, 3.8 or 3.9
+
 ## Install in a local Python environment
 
 By default, torch-ort-inference depends on PyTorch 1.12 and ONNX Runtime OpenVINO EP 1.12.
 
-Install torch-ort-inference with OpenVINO dependencies
+1. Install torch-ort-inference with OpenVINO dependencies.
 
-- `pip install torch-ort-inference[openvino]`
+    - `pip install torch-ort-inference[openvino]`
+<br/><br/>
+2. Run post-installation script
 
-## Verify your installation
+    - `python -m torch_ort.configure`
 
-Once you have created your environment, using Python, execute the following steps to validate that your installation is correct.
+## Verify your installation
 
-1. Download a inference script
+Once you have created your environment, execute the following steps to validate that your installation is correct.
 
-   - `wget https://raw.githubusercontent.com/pytorch/ort/main/torch_ort_inference/tests/bert_for_sequence_classification.py`
+1. Clone this repo
 
+    - `git clone git@github.com:pytorch/ort.git`
+<br/><br/>
 2. Install extra dependencies
 
     - `pip install wget pandas transformers`
-
+<br/><br/>
 3. Run the inference script
 
     - `python ./ort/torch_ort_inference/tests/bert_for_sequence_classification.py`
@@ -204,6 +214,11 @@ If no provider options are specified by user, OpenVINO™ Execution Provider is
 backend = "CPU"
 precision = "FP32"
 ```
+For more details on APIs, see [usage.md](/torch_ort_inference/docs/usage.md).
+
+### Note
+
+Currently, Vision models are supported on Intel® VPUs. Support for NLP models may be added in future releases.
 
 ## License
 
diff --git a/torch_ort_inference/docs/install.md b/torch_ort_inference/docs/install.md
index e8a215f9..c87601c3 100644
--- a/torch_ort_inference/docs/install.md
+++ b/torch_ort_inference/docs/install.md
@@ -2,6 +2,12 @@
 
 You can install and run torch-ort-inference in your local environment.
 
+## Prerequisites
+
+- Ubuntu 18.04, 20.04
+
+- Python* 3.7, 3.8 or 3.9
+
 ## Run in a Python environment
 
 ### Default dependencies
diff --git a/torch_ort_inference/docs/usage.md b/torch_ort_inference/docs/usage.md
new file mode 100644
index 00000000..2dbe15ab
--- /dev/null
+++ b/torch_ort_inference/docs/usage.md
@@ -0,0 +1,42 @@
+# APIs for OpenVINO™ integration with TorchORT
+
+This document describes available Python APIs for OpenVINO™ integration with TorchORT to accelerate inference for PyTorch models on various Intel hardware.
+
+## Essential APIs
+
+To add the OpenVINO™ integration with TorchORT package to your PyTorch application, add following 2 lines of code:
+
+```python
+from torch_ort import ORTInferenceModule
+model = ORTInferenceModule(model)
+```
+
+By default, CPU backend with FP32 precision is enabled. You can set different backend and supported precision using OpenVINOProviderOptions as below:
+
+```python
+provider_options = OpenVINOProviderOptions(backend = "GPU", precision = "FP16")
+model = ORTInferenceModule(model, provider_options = provider_options)
+```
+Supported backend-precision combinations:
+| Backend | Precision |
+| --------| --------- |  
+|   CPU   |    FP32   |
+|   GPU   |    FP32   |
+|   GPU   |    FP16   |
+|  MYRIAD |    FP16   |
+
+## Additional APIs
+
+To save the inline exported onnx model, use DebugOptions as below:
+
+```python
+debug_options = DebugOptions(save_onnx=True, onnx_prefix='<model_name>')
+model = ORTInferenceModule(model, debug_options=debug_options)
+```
+
+To enable verbose log of the execution of the TorchORT pipeline, use DebugOptions as below:
+
+```python
+debug_options = DebugOptions(log_level=LogLevel.VERBOSE)
+model = ORTInferenceModule(model, debug_options=debug_options)
+```
diff --git a/torch_ort_inference/tests/bert_for_sequence_classification.py b/torch_ort_inference/tests/bert_for_sequence_classification.py
index 6e8203ed..e9f3ce46 100644
--- a/torch_ort_inference/tests/bert_for_sequence_classification.py
+++ b/torch_ort_inference/tests/bert_for_sequence_classification.py
@@ -8,6 +8,7 @@
 import numpy as np
 import time
 import pandas as pd
+import pathlib
 
 from transformers import AutoTokenizer
 from transformers import AutoModelForSequenceClassification
@@ -16,30 +17,58 @@
 from torch_ort import ORTInferenceModule, OpenVINOProviderOptions
 
 ov_backend_precisions = {"CPU": ["FP32"], "GPU": ["FP32", "FP16"]}
-
+inference_execution_providers = ["openvino"]
 
 def preprocess_input(tokenizer, sentences):
     # Tokenization & Input Formatting
     # Config: "do_lower_case": true, "model_max_length": 512
     inputs = []
 
+    MAX_LEN = 64
+
     for sentence in sentences:
-        tokenized_inputs = tokenizer(
-            sentence,
-            return_tensors="pt",
-            padding='max_length',
-            truncation=True)
-        inputs.append(tokenized_inputs)
+        # `encode` will:
+        #   (1) Tokenize the sentence.
+        #   (2) Prepend the `[CLS]` token to the start.
+        #   (3) Append the `[SEP]` token to the end.
+        #   (4) Map tokens to their IDs.
+        encoded_sent = tokenizer.encode(
+                            sentence,                      # Sentence to encode.
+                            add_special_tokens = True, # Add '[CLS]' and '[SEP]'
+                    )
+
+        # Pad our input tokens with value 0.
+        if len(encoded_sent) < MAX_LEN:
+            encoded_sent.extend([0]*(MAX_LEN-len(encoded_sent)))
+
+        # Truncate to MAX_LEN
+        if len(encoded_sent) > MAX_LEN:
+            print("WARNING: During preprocessing, number of tokens for the sentence {}"\
+                "exceedeed MAX LENGTH {}. This might impact accuracy of the results".format(
+                sentence,
+                MAX_LEN
+            ))
+            encoded_sent = encoded_sent[:MAX_LEN]
+
+        # Create the attention mask.
+        #   - If a token ID is 0, then it's padding, set the mask to 0.
+        #   - If a token ID is > 0, then it's a real token, set the mask to 1.
+        att_mask = [int(token_id > 0) for token_id in encoded_sent]
+
+        # Store the input ids and attention masks for the sentence.
+        inputs.append({'input_ids': torch.unsqueeze(torch.tensor(encoded_sent),0),
+                'attention_mask': torch.unsqueeze(torch.tensor(att_mask),0)})
 
     return inputs
 
 
-def infer(model, tokenizer, inputs):
+def infer(model, sentences, inputs):
+    num_sentences = len(sentences)
     total_infer_time = 0
     results = {}
 
     # Run inference
-    for i in range(len(inputs)):
+    for i in range(num_sentences):
         input_ids = (inputs[i])['input_ids']
         attention_masks = (inputs[i])['attention_mask']
         with torch.no_grad():
@@ -47,7 +76,6 @@ def infer(model, tokenizer, inputs):
             if i == 0:
                t0 = time.time()
                model(input_ids, attention_masks)
-               print("warm up time:", time.time()-t0)
             # infer
             t0 = time.time()
             outputs = model(input_ids, attention_masks)
@@ -63,18 +91,21 @@ def infer(model, tokenizer, inputs):
 
         # predictions
         pred_flat = np.argmax(logits, axis=1).flatten()
-        orig_sent = tokenizer.decode(input_ids[0],skip_special_tokens=True)
+        orig_sent = sentences[i]
         results[orig_sent] = pred_flat[0]
 
-    print("\n Top (20) Results: \n")
+    print("\n Number of sentences: {}".format(num_sentences))
+    if num_sentences > 20:
+        print(" First 20 results:")
+    print("\t Grammar correctness label (0=unacceptable, 1=acceptable)\n")
     count = 0
     for k, v in results.items():
         print("\t{!r} : {!r}".format(k, v))
         if count == 20:
             break
         count = count + 1
-    print("\nInference time: {:.4f}s".format(total_infer_time))
-
+    print("\n Average inference time: {:.4f}ms".format((total_infer_time/num_sentences)*1000))
+    print(" Total Inference time: {:.4f}ms".format(total_infer_time * 1000))
 
 def main():
     # 1. Basic setup
@@ -85,7 +116,7 @@ def main():
         "--pytorch-only",
         action="store_true",
         default=False,
-        help="disables ONNX Runtime",
+        help="disables ONNX Runtime inference",
     )
     parser.add_argument(
         "--input",
@@ -119,25 +150,59 @@ def main():
     if not args.pytorch_only:
         if args.provider is None:
             print("OpenVINOExecutionProvider is enabled with CPU and FP32 by default.")
+            if args.backend or args.precision:
+                raise ValueError("Provider not specified!! Please specify provider arg along with backend and precision.")
         elif args.provider == "openvino":
             if args.backend and args.precision:
                 if args.backend not in list(ov_backend_precisions.keys()):
-                    raise Exception(
-                        "Invalid backend. Valid values are:",
-                        list(ov_backend_precisions.keys()),
-                    )
+                    raise ValueError(
+                        "Invalid backend. Valid values are: {}".format(
+                            list(ov_backend_precisions.keys())))
                 if args.precision not in ov_backend_precisions[args.backend]:
-                    raise Exception("Invalid precision for provided backend. Valid values are:",
-                    list(ov_backend_precisions[args.backend]))
-            else:
-                print(
-                    "OpenVINOExecutionProvider is enabled with CPU and FP32 by default."
-                    + " Please specify both backend and precision to override.\n"
+                    raise ValueError("Invalid precision for provided backend. Valid values are: {}".format(
+                    list(ov_backend_precisions[args.backend])))
+            elif args.backend or args.precision:
+                raise ValueError(
+                    "Please specify both backend and precision to override default options.\n"
                 )
+            else:
+                print("OpenVINOExecutionProvider is enabled with CPU and FP32 by default.")
         else:
-            raise Exception("Invalid execution provider!!")
+            raise ValueError("Invalid execution provider!! Available providers are: {}".format(inference_execution_providers))
+    else:
+        print("ONNXRuntime inference is disabled.")
+        if args.provider or args.precision or args.backend:
+            raise ValueError("provider, backend, precision arguments are not applicable for --pytorch-only option.")
+
+    # 2. Read input sentence(s)
+    # Input can be a single sentence, list of single sentences in a .tsv file.
+    if args.input and args.input_file:
+        raise ValueError("Please provide either input or input file for inference.")
 
-    # 2. Load Model
+    if args.input is not None:
+        sentences = [args.input]
+    elif args.input_file is not None:
+        file_name = args.input_file
+        if not os.path.exists(file_name):
+            raise ValueError("Invalid input file path: %s" % file_name)
+        if os.stat(file_name).st_size == 0:
+            raise ValueError("Input file is empty!!")
+        name, ext = os.path.splitext(file_name)
+        if ext != ".tsv":
+            raise ValueError("Invalid input file format. Please provide .tsv file.")
+        df = pd.read_csv(
+            file_name,
+            delimiter="\t",
+            header=None,
+            names=["Id", "Sentence"],
+            skiprows=1,
+        )
+        sentences = df.Sentence.values
+    else:
+        print("Input not provided! Using default input...")
+        sentences = ["This is a BERT sample.","User input is valid not."]
+
+    # 3. Load Model
     # Pretrained model fine-tuned on CoLA dataset from huggingface model hub to predict grammar correctness
     model = AutoModelForSequenceClassification.from_pretrained(
         "textattack/bert-base-uncased-CoLA"
@@ -155,31 +220,12 @@ def main():
     # Convert model for evaluation
     model.eval()
 
-    # 3. Read input sentence(s)
-    # Input can be a single sentence, list of single sentences in a .tsv file.
-    if args.input is not None:
-        sentences = [args.input]
-    elif args.input_file is not None:
-        if not os.path.exists(args.input_file):
-            raise ValueError("Invalid input file path: %s" % args.input_file)
-        df = pd.read_csv(
-            args.input_file,
-            delimiter="\t",
-            header=None,
-            names=["Id", "Sentence"],
-            skiprows=1,
-        )
-        sentences = df.Sentence.values
-    else:
-        print("Input not provided! Using default input...")
-        sentences = ["This is a sample input."]
-
     # 4. Load Tokenizer & Preprocess input sentences
     tokenizer = AutoTokenizer.from_pretrained("textattack/bert-base-uncased-CoLA")
     inputs = preprocess_input(tokenizer, sentences)
 
     # 5. Infer
-    infer(model, tokenizer, inputs)
+    infer(model, sentences, inputs)
 
 
 if __name__ == "__main__":
diff --git a/torch_ort_inference/tests/resnet_image_classification.py b/torch_ort_inference/tests/resnet_image_classification.py
index 3b950a24..a6896d6a 100644
--- a/torch_ort_inference/tests/resnet_image_classification.py
+++ b/torch_ort_inference/tests/resnet_image_classification.py
@@ -1,163 +1,167 @@
-# -------------------------------------------------------------------------
-# Copyright (C) 2022 Intel Corporation
-# Licensed under the MIT License
-# --------------------------------------------------------------------------
-
-import os
-import time
-import torch
-import wget
-import argparse
-from PIL import Image
-from torchvision import transforms
-import torchvision.models as models
-from torch_ort import (
-    ORTInferenceModule,
-    OpenVINOProviderOptions,
-)
-
-ov_backend_precisions = {"CPU": ["FP32"], "GPU": ["FP32", "FP16"], "MYRIAD": ["FP16"]}
-
-def download_labels(labels):
-    if not labels:
-        labels = "imagenet_classes.txt"
-        if not os.path.exists(labels):
-            labelsUrl = (
-                "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
-            )
-            # Download the file (if we haven't already)
-            wget.download(labelsUrl)
-        else:
-            print("\nReusing downloaded imagenet labels")
-
-    # Read the categories
-    with open(labels, "r") as f:
-        categories = [s.strip() for s in f.readlines()]
-        return categories
-
-
-def preprocess(img):
-    transform = transforms.Compose(
-        [
-            transforms.Resize(256),
-            transforms.CenterCrop(224),
-            transforms.ToTensor(),
-            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
-        ]
-    )
-    return transform(img)
-
-
-def infer(model, image, categories):
-    # warmup
-    model(image)
-
-    # Start inference
-    t0 = time.time()
-    outputs = model(image)
-    t1 = time.time() - t0
-    print("\nInference time: {:.4f}ms\n".format(t1 * 1000))
-
-    # The output has unnormalized scores. Run a softmax on it for probabilities.
-    probabilities = torch.nn.functional.softmax(outputs[0], dim=0)
-
-    # Show top categories per image
-    top5_prob, top5_catid = torch.topk(probabilities, 5)
-    print("Top 5 Results: \nLabels , Probabilities:")
-    for i in range(top5_prob.size(0)):
-        print(categories[top5_catid[i]], top5_prob[i].item())
-
-
-def main():
-    # 1. Basic setup
-    parser = argparse.ArgumentParser(description="PyTorch Image Classification Example")
-
-    parser.add_argument(
-        "--pytorch-only",
-        action="store_true",
-        default=False,
-        help="disables ONNX Runtime",
-    )
-    parser.add_argument(
-        "--labels",
-        type=str,
-        help="path to labels file")
-    parser.add_argument(
-        "--input-file",
-        type=str,
-        required=True,
-        help="path to input image file"
-    )
-    parser.add_argument(
-        "--provider",
-        type=str,
-        help="ONNX Runtime Execution Provider",
-    )
-    parser.add_argument(
-        "--backend",
-        type=str,
-        help="OpenVINO target device (CPU, GPU or MYRIAD)"
-    )
-    parser.add_argument(
-        "--precision",
-        type=str,
-        help="OpenVINO target device precision (FP16 or FP32)"
-    )
-
-    args = parser.parse_args()
-
-    # parameters validation
-    if not args.pytorch_only:
-        if args.provider is None:
-            print("OpenVINOExecutionProvider is enabled with CPU and FP32 by default.")
-        elif args.provider == "openvino":
-            if args.backend and args.precision:
-                if args.backend not in list(ov_backend_precisions.keys()):
-                    raise Exception(
-                        "Invalid backend. Valid values are:",
-                        list(ov_backend_precisions.keys()),
-                    )
-                if args.precision not in ov_backend_precisions[args.backend]:
-                    raise Exception("Invalid precision for provided backend. Valid values are:",
-                    list(ov_backend_precisions[args.backend]))
-            else:
-                print(
-                    "OpenVINOExecutionProvider is enabled with CPU and FP32 by default."
-                    + " Please specify both backend and precision to override.\n"
-                )
-        else:
-            raise Exception("Invalid execution provider!!")
-
-    # 2. Download and load the model
-    model = models.resnet50(pretrained=True)
-    if not args.pytorch_only:
-        if args.provider == "openvino" and (args.backend and args.precision):
-            provider_options = OpenVINOProviderOptions(
-                backend=args.backend, precision=args.precision
-            )
-            model = ORTInferenceModule(model, provider_options=provider_options)
-        else:
-            model = ORTInferenceModule(model)
-
-    # Convert model for evaluation
-    model.eval()
-
-    # 3. Download ImageNet labels
-    categories = download_labels(args.labels)
-
-    # 4. Read input image file and preprocess
-    if not args.input_file:
-        raise ValueError("Path to input image not provided!")
-    if not os.path.exists(args.input_file):
-        raise ValueError("Invalid input file path")
-    img = Image.open(args.input_file)
-    img_trans = preprocess(img)
-    # Adding batch dimension (size 1)
-    img_trans = torch.unsqueeze(img_trans, 0)
-
-    # 5. Infer
-    infer(model, img_trans, categories)
-
-
-if __name__ == "__main__":
-    main()
+# -------------------------------------------------------------------------
+# Copyright (C) 2022 Intel Corporation
+# Licensed under the MIT License
+# --------------------------------------------------------------------------
+
+import os
+import time
+import torch
+import wget
+import argparse
+from PIL import Image
+from torchvision import transforms
+import torchvision.models as models
+from torch_ort import ORTInferenceModule, OpenVINOProviderOptions
+
+ov_backend_precisions = {"CPU": ["FP32"], "GPU": ["FP32", "FP16"], "MYRIAD": ["FP16"]}
+inference_execution_providers = ["openvino"]
+
+def download_labels(labels):
+    if not labels:
+        labels = "imagenet_classes.txt"
+        if not os.path.exists(labels):
+            labelsUrl = (
+                "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
+            )
+            # Download the file (if we haven't already)
+            wget.download(labelsUrl)
+        else:
+            print("\nReusing downloaded imagenet labels")
+
+    # Read the categories
+    with open(labels, "r") as f:
+        categories = [s.strip() for s in f.readlines()]
+        return categories
+
+
+def preprocess(img):
+    transform = transforms.Compose(
+        [
+            transforms.Resize(256),
+            transforms.CenterCrop(224),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+        ]
+    )
+    return transform(img)
+
+
+def infer(model, image, categories):
+    # warmup
+    model(image)
+
+    # Start inference
+    t0 = time.time()
+    outputs = model(image)
+    t1 = time.time() - t0
+    print("\nInference time: {:.4f}ms\n".format(t1 * 1000))
+
+    # The output has unnormalized scores. Run a softmax on it for probabilities.
+    probabilities = torch.nn.functional.softmax(outputs[0], dim=0)
+
+    # Show top categories per image
+    top5_prob, top5_catid = torch.topk(probabilities, 5)
+    print("Top 5 Results: \nLabels , Probabilities:")
+    for i in range(top5_prob.size(0)):
+        print(categories[top5_catid[i]], top5_prob[i].item())
+
+
+def main():
+    # 1. Basic setup
+    parser = argparse.ArgumentParser(description="PyTorch Image Classification Example")
+
+    parser.add_argument(
+        "--pytorch-only",
+        action="store_true",
+        default=False,
+        help="disables ONNX Runtime inference",
+    )
+    parser.add_argument(
+        "--labels",
+        type=str,
+        help="path to labels file")
+    parser.add_argument(
+        "--input-file",
+        type=str,
+        required=True,
+        help="path to input image file"
+    )
+    parser.add_argument(
+        "--provider",
+        type=str,
+        help="ONNX Runtime Execution Provider",
+    )
+    parser.add_argument(
+        "--backend",
+        type=str,
+        help="OpenVINO target device (CPU, GPU or MYRIAD)"
+    )
+    parser.add_argument(
+        "--precision",
+        type=str,
+        help="OpenVINO target device precision (FP16 or FP32)"
+    )
+
+    args = parser.parse_args()
+
+    # parameters validation
+    if not args.pytorch_only:
+        if args.provider is None:
+            print("OpenVINOExecutionProvider is enabled with CPU and FP32 by default.")
+            if args.backend or args.precision:
+                raise ValueError("Provider not specified!! Please specify provider arg along with backend and precision.")
+        elif args.provider == "openvino":
+            if args.backend and args.precision:
+                if args.backend not in list(ov_backend_precisions.keys()):
+                    raise ValueError(
+                        "Invalid backend. Valid values are: {}".format(
+                            list(ov_backend_precisions.keys())))
+                if args.precision not in ov_backend_precisions[args.backend]:
+                    raise ValueError("Invalid precision for provided backend. Valid values are: {}".format(
+                    list(ov_backend_precisions[args.backend])))
+            elif args.backend or args.precision:
+                raise ValueError(
+                    "Please specify both backend and precision to override default options.\n"
+                )
+            else:
+                print("OpenVINOExecutionProvider is enabled with CPU and FP32 by default.")
+        else:
+            raise ValueError("Invalid execution provider!! Available providers are: {}".format(inference_execution_providers))
+    else:
+        print("ONNXRuntime inference is disabled.")
+        if args.provider or args.precision or args.backend:
+            raise ValueError("provider, backend, precision arguments are not applicable for --pytorch-only option.")
+
+    # 2. Read input image file and preprocess
+    if not args.input_file:
+        raise ValueError("Path to input image not provided!")
+    if not os.path.exists(args.input_file):
+        raise ValueError("Invalid input file path.")
+    img = Image.open(args.input_file)
+    img_trans = preprocess(img)
+    # Adding batch dimension (size 1)
+    img_trans = torch.unsqueeze(img_trans, 0)
+
+    # 3. Download and load the model
+    model = models.resnet50(pretrained=True)
+    if not args.pytorch_only:
+        if args.provider == "openvino" and (args.backend and args.precision):
+            provider_options = OpenVINOProviderOptions(
+                backend=args.backend, precision=args.precision
+            )
+            model = ORTInferenceModule(model, provider_options=provider_options)
+        else:
+            model = ORTInferenceModule(model)
+
+    # Convert model for evaluation
+    model.eval()
+
+    # 4. Download ImageNet labels
+    categories = download_labels(args.labels)
+
+    # 5. Infer
+    infer(model, img_trans, categories)
+    img.close()
+
+if __name__ == "__main__":
+    main()
diff --git a/torch_ort_inference/torch_ort/ortinferencemodule/ortinferencemodule.py b/torch_ort_inference/torch_ort/ortinferencemodule/ortinferencemodule.py
index 3d0fd066..dbfa6543 100644
--- a/torch_ort_inference/torch_ort/ortinferencemodule/ortinferencemodule.py
+++ b/torch_ort_inference/torch_ort/ortinferencemodule/ortinferencemodule.py
@@ -106,8 +106,7 @@ def _forward_call(self, *inputs, **kwargs):
 
         # Use IO binding
         onnx_input_names = [inp.name for inp in self._onnx_models.exported_model.graph.input]
-        input_info = _io.parse_inputs_for_onnx_export(self._module_parameters, None, schema, inputs, kwargs)
-        inputs = _utils_infer.get_user_inputs(onnx_input_names, input_info, inputs, kwargs, self._device)
+        inputs = _utils_infer.get_user_inputs(onnx_input_names, self._flattened_module._input_info, inputs, kwargs, self._device)
 
         io_binding = self._inference_session.io_binding()
         _utils._create_iobinding(io_binding, inputs, self._onnx_models.exported_model, self._device)