diff --git a/Readme.md b/Readme.md
index b901525a..5817218f 100644
--- a/Readme.md
+++ b/Readme.md
@@ -147,32 +147,42 @@ To see torch-ort in action, see https://github.com/microsoft/onnxruntime-trainin
 
 # Accelerate inference for PyTorch models with ONNX Runtime (Preview)
 
-ONNX Runtime for PyTorch accelerates PyTorch model inference using ONNX Runtime.
+ONNX Runtime for PyTorch is now extended to support PyTorch model inference using ONNX Runtime.
 
-It is available via the torch-ort-inference python package. This preview package enables OpenVINO™ Execution Provider for ONNX Runtime by default for accelerating inference on various Intel CPUs and integrated GPUs.
+It is available via the torch-ort-inference python package. This preview package enables OpenVINO™ Execution Provider for ONNX Runtime by default for accelerating inference on various Intel® CPUs, Intel® integrated GPUs, and Intel® Movidius™ Vision Processing Units - referred to as VPU.
 
 This repository contains the source code for the package, as well as instructions for running the package.
 
+## Prerequisites
+
+- Ubuntu 18.04, 20.04
+
+- Python* 3.7, 3.8 or 3.9
+
 ## Install in a local Python environment
 
 By default, torch-ort-inference depends on PyTorch 1.12 and ONNX Runtime OpenVINO EP 1.12.
 
-Install torch-ort-inference with OpenVINO dependencies
+1. Install torch-ort-inference with OpenVINO dependencies.
 
-- `pip install torch-ort-inference[openvino]`
+    - `pip install torch-ort-inference[openvino]`
+<br/><br/>
+2. Run post-installation script
 
-## Verify your installation
+    - `python -m torch_ort.configure`
 
-Once you have created your environment, using Python, execute the following steps to validate that your installation is correct.
+## Verify your installation
 
-1. Download a inference script
+Once you have created your environment, execute the following steps to validate that your installation is correct.
 
-   - `wget https://raw.githubusercontent.com/pytorch/ort/main/torch_ort_inference/tests/bert_for_sequence_classification.py`
+1. Clone this repo
 
+    - `git clone git@github.com:pytorch/ort.git`
+<br/><br/>
 2. Install extra dependencies
 
     - `pip install wget pandas transformers`
-
+<br/><br/>
 3. Run the inference script
 
     - `python ./ort/torch_ort_inference/tests/bert_for_sequence_classification.py`
@@ -204,6 +214,11 @@ If no provider options are specified by user, OpenVINO™ Execution Provider is
 backend = "CPU"
 precision = "FP32"
 ```
+For more details on APIs, see [usage.md](/torch_ort_inference/docs/usage.md).
+
+### Note
+
+Currently, Vision models are supported on Intel® VPUs. Support for NLP models may be added in future releases.
 
 ## License
 
diff --git a/torch_ort_inference/docs/install.md b/torch_ort_inference/docs/install.md
index e8a215f9..c87601c3 100644
--- a/torch_ort_inference/docs/install.md
+++ b/torch_ort_inference/docs/install.md
@@ -2,6 +2,12 @@
 
 You can install and run torch-ort-inference in your local environment.
 
+## Prerequisites
+
+- Ubuntu 18.04, 20.04
+
+- Python* 3.7, 3.8 or 3.9
+
 ## Run in a Python environment
 
 ### Default dependencies
diff --git a/torch_ort_inference/docs/usage.md b/torch_ort_inference/docs/usage.md
new file mode 100644
index 00000000..2dbe15ab
--- /dev/null
+++ b/torch_ort_inference/docs/usage.md
@@ -0,0 +1,42 @@
+# APIs for OpenVINO™ integration with TorchORT
+
+This document describes available Python APIs for OpenVINO™ integration with TorchORT to accelerate inference for PyTorch models on various Intel hardware.
+
+## Essential APIs
+
+To add the OpenVINO™ integration with TorchORT package to your PyTorch application, add following 2 lines of code:
+
+```python
+from torch_ort import ORTInferenceModule
+model = ORTInferenceModule(model)
+```
+
+By default, CPU backend with FP32 precision is enabled. You can set different backend and supported precision using OpenVINOProviderOptions as below:
+
+```python
+provider_options = OpenVINOProviderOptions(backend = "GPU", precision = "FP16")
+model = ORTInferenceModule(model, provider_options = provider_options)
+```
+Supported backend-precision combinations:
+| Backend | Precision |
+| --------| --------- |  
+|   CPU   |    FP32   |
+|   GPU   |    FP32   |
+|   GPU   |    FP16   |
+|  MYRIAD |    FP16   |
+
+## Additional APIs
+
+To save the inline exported onnx model, use DebugOptions as below:
+
+```python
+debug_options = DebugOptions(save_onnx=True, onnx_prefix='<model_name>')
+model = ORTInferenceModule(model, debug_options=debug_options)
+```
+
+To enable verbose log of the execution of the TorchORT pipeline, use DebugOptions as below:
+
+```python
+debug_options = DebugOptions(log_level=LogLevel.VERBOSE)
+model = ORTInferenceModule(model, debug_options=debug_options)
+```
diff --git a/torch_ort_inference/tests/bert_for_sequence_classification.py b/torch_ort_inference/tests/bert_for_sequence_classification.py
index 6e8203ed..e9f3ce46 100644
--- a/torch_ort_inference/tests/bert_for_sequence_classification.py
+++ b/torch_ort_inference/tests/bert_for_sequence_classification.py
@@ -8,6 +8,7 @@
 import numpy as np
 import time
 import pandas as pd
+import pathlib
 
 from transformers import AutoTokenizer
 from transformers import AutoModelForSequenceClassification
@@ -16,30 +17,58 @@
 from torch_ort import ORTInferenceModule, OpenVINOProviderOptions
 
 ov_backend_precisions = {"CPU": ["FP32"], "GPU": ["FP32", "FP16"]}
-
+inference_execution_providers = ["openvino"]
 
 def preprocess_input(tokenizer, sentences):
     # Tokenization & Input Formatting
     # Config: "do_lower_case": true, "model_max_length": 512
     inputs = []
 
+    MAX_LEN = 64
+
     for sentence in sentences:
-        tokenized_inputs = tokenizer(
-            sentence,
-            return_tensors="pt",
-            padding='max_length',
-            truncation=True)
-        inputs.append(tokenized_inputs)
+        # `encode` will:
+        #   (1) Tokenize the sentence.
+        #   (2) Prepend the `[CLS]` token to the start.
+        #   (3) Append the `[SEP]` token to the end.
+        #   (4) Map tokens to their IDs.
+        encoded_sent = tokenizer.encode(
+                            sentence,                      # Sentence to encode.
+                            add_special_tokens = True, # Add '[CLS]' and '[SEP]'
+                    )
+
+        # Pad our input tokens with value 0.
+        if len(encoded_sent) < MAX_LEN:
+            encoded_sent.extend([0]*(MAX_LEN-len(encoded_sent)))
+
+        # Truncate to MAX_LEN
+        if len(encoded_sent) > MAX_LEN:
+            print("WARNING: During preprocessing, number of tokens for the sentence {}"\
+                "exceedeed MAX LENGTH {}. This might impact accuracy of the results".format(
+                sentence,
+                MAX_LEN
+            ))
+            encoded_sent = encoded_sent[:MAX_LEN]
+
+        # Create the attention mask.
+        #   - If a token ID is 0, then it's padding, set the mask to 0.
+        #   - If a token ID is > 0, then it's a real token, set the mask to 1.
+        att_mask = [int(token_id > 0) for token_id in encoded_sent]
+
+        # Store the input ids and attention masks for the sentence.
+        inputs.append({'input_ids': torch.unsqueeze(torch.tensor(encoded_sent),0),
+                'attention_mask': torch.unsqueeze(torch.tensor(att_mask),0)})
 
     return inputs
 
 
-def infer(model, tokenizer, inputs):
+def infer(model, sentences, inputs):
+    num_sentences = len(sentences)
     total_infer_time = 0
     results = {}
 
     # Run inference
-    for i in range(len(inputs)):
+    for i in range(num_sentences):
         input_ids = (inputs[i])['input_ids']
         attention_masks = (inputs[i])['attention_mask']
         with torch.no_grad():
@@ -47,7 +76,6 @@ def infer(model, tokenizer, inputs):
             if i == 0:
                t0 = time.time()
                model(input_ids, attention_masks)
-               print("warm up time:", time.time()-t0)
             # infer
             t0 = time.time()
             outputs = model(input_ids, attention_masks)
@@ -63,18 +91,21 @@ def infer(model, tokenizer, inputs):
 
         # predictions
         pred_flat = np.argmax(logits, axis=1).flatten()
-        orig_sent = tokenizer.decode(input_ids[0],skip_special_tokens=True)
+        orig_sent = sentences[i]
         results[orig_sent] = pred_flat[0]
 
-    print("\n Top (20) Results: \n")
+    print("\n Number of sentences: {}".format(num_sentences))
+    if num_sentences > 20:
+        print(" First 20 results:")
+    print("\t Grammar correctness label (0=unacceptable, 1=acceptable)\n")
     count = 0
     for k, v in results.items():
         print("\t{!r} : {!r}".format(k, v))
         if count == 20:
             break
         count = count + 1
-    print("\nInference time: {:.4f}s".format(total_infer_time))
-
+    print("\n Average inference time: {:.4f}ms".format((total_infer_time/num_sentences)*1000))
+    print(" Total Inference time: {:.4f}ms".format(total_infer_time * 1000))
 
 def main():
     # 1. Basic setup
@@ -85,7 +116,7 @@ def main():
         "--pytorch-only",
         action="store_true",
         default=False,
-        help="disables ONNX Runtime",
+        help="disables ONNX Runtime inference",
     )
     parser.add_argument(
         "--input",
@@ -119,25 +150,59 @@ def main():
     if not args.pytorch_only:
         if args.provider is None:
             print("OpenVINOExecutionProvider is enabled with CPU and FP32 by default.")
+            if args.backend or args.precision:
+                raise ValueError("Provider not specified!! Please specify provider arg along with backend and precision.")
         elif args.provider == "openvino":
             if args.backend and args.precision:
                 if args.backend not in list(ov_backend_precisions.keys()):
-                    raise Exception(
-                        "Invalid backend. Valid values are:",
-                        list(ov_backend_precisions.keys()),
-                    )
+                    raise ValueError(
+                        "Invalid backend. Valid values are: {}".format(
+                            list(ov_backend_precisions.keys())))
                 if args.precision not in ov_backend_precisions[args.backend]:
-                    raise Exception("Invalid precision for provided backend. Valid values are:",
-                    list(ov_backend_precisions[args.backend]))
-            else:
-                print(
-                    "OpenVINOExecutionProvider is enabled with CPU and FP32 by default."
-                    + " Please specify both backend and precision to override.\n"
+                    raise ValueError("Invalid precision for provided backend. Valid values are: {}".format(
+                    list(ov_backend_precisions[args.backend])))
+            elif args.backend or args.precision:
+                raise ValueError(
+                    "Please specify both backend and precision to override default options.\n"
                 )
+            else:
+                print("OpenVINOExecutionProvider is enabled with CPU and FP32 by default.")
         else:
-            raise Exception("Invalid execution provider!!")
+            raise ValueError("Invalid execution provider!! Available providers are: {}".format(inference_execution_providers))
+    else:
+        print("ONNXRuntime inference is disabled.")
+        if args.provider or args.precision or args.backend:
+            raise ValueError("provider, backend, precision arguments are not applicable for --pytorch-only option.")
+
+    # 2. Read input sentence(s)
+    # Input can be a single sentence, list of single sentences in a .tsv file.
+    if args.input and args.input_file:
+        raise ValueError("Please provide either input or input file for inference.")
 
-    # 2. Load Model
+    if args.input is not None:
+        sentences = [args.input]
+    elif args.input_file is not None:
+        file_name = args.input_file
+        if not os.path.exists(file_name):
+            raise ValueError("Invalid input file path: %s" % file_name)
+        if os.stat(file_name).st_size == 0:
+            raise ValueError("Input file is empty!!")
+        name, ext = os.path.splitext(file_name)
+        if ext != ".tsv":
+            raise ValueError("Invalid input file format. Please provide .tsv file.")
+        df = pd.read_csv(
+            file_name,
+            delimiter="\t",
+            header=None,
+            names=["Id", "Sentence"],
+            skiprows=1,
+        )
+        sentences = df.Sentence.values
+    else:
+        print("Input not provided! Using default input...")
+        sentences = ["This is a BERT sample.","User input is valid not."]
+
+    # 3. Load Model
     # Pretrained model fine-tuned on CoLA dataset from huggingface model hub to predict grammar correctness
     model = AutoModelForSequenceClassification.from_pretrained(
         "textattack/bert-base-uncased-CoLA"
@@ -155,31 +220,12 @@ def main():
     # Convert model for evaluation
     model.eval()
 
-    # 3. Read input sentence(s)
-    # Input can be a single sentence, list of single sentences in a .tsv file.
-    if args.input is not None:
-        sentences = [args.input]
-    elif args.input_file is not None:
-        if not os.path.exists(args.input_file):
-            raise ValueError("Invalid input file path: %s" % args.input_file)
-        df = pd.read_csv(
-            args.input_file,
-            delimiter="\t",
-            header=None,
-            names=["Id", "Sentence"],
-            skiprows=1,
-        )
-        sentences = df.Sentence.values
-    else:
-        print("Input not provided! Using default input...")
-        sentences = ["This is a sample input."]
-
     # 4. Load Tokenizer & Preprocess input sentences
     tokenizer = AutoTokenizer.from_pretrained("textattack/bert-base-uncased-CoLA")
     inputs = preprocess_input(tokenizer, sentences)
 
     # 5. Infer
-    infer(model, tokenizer, inputs)
+    infer(model, sentences, inputs)
 
 
 if __name__ == "__main__":
diff --git a/torch_ort_inference/tests/resnet_image_classification.py b/torch_ort_inference/tests/resnet_image_classification.py
index 3b950a24..a6896d6a 100644
--- a/torch_ort_inference/tests/resnet_image_classification.py
+++ b/torch_ort_inference/tests/resnet_image_classification.py
@@ -1,163 +1,167 @@
-# -------------------------------------------------------------------------
-# Copyright (C) 2022 Intel Corporation
-# Licensed under the MIT License
-# --------------------------------------------------------------------------
-
-import os
-import time
-import torch
-import wget
-import argparse
-from PIL import Image
-from torchvision import transforms
-import torchvision.models as models
-from torch_ort import (
-    ORTInferenceModule,
-    OpenVINOProviderOptions,
-)
-
-ov_backend_precisions = {"CPU": ["FP32"], "GPU": ["FP32", "FP16"], "MYRIAD": ["FP16"]}
-
-def download_labels(labels):
-    if not labels:
-        labels = "imagenet_classes.txt"
-        if not os.path.exists(labels):
-            labelsUrl = (
-                "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
-            )
-            # Download the file (if we haven't already)
-            wget.download(labelsUrl)
-        else:
-            print("\nReusing downloaded imagenet labels")
-
-    # Read the categories
-    with open(labels, "r") as f:
-        categories = [s.strip() for s in f.readlines()]
-        return categories
-
-
-def preprocess(img):
-    transform = transforms.Compose(
-        [
-            transforms.Resize(256),
-            transforms.CenterCrop(224),
-            transforms.ToTensor(),
-            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
-        ]
-    )
-    return transform(img)
-
-
-def infer(model, image, categories):
-    # warmup
-    model(image)
-
-    # Start inference
-    t0 = time.time()
-    outputs = model(image)
-    t1 = time.time() - t0
-    print("\nInference time: {:.4f}ms\n".format(t1 * 1000))
-
-    # The output has unnormalized scores. Run a softmax on it for probabilities.
-    probabilities = torch.nn.functional.softmax(outputs[0], dim=0)
-
-    # Show top categories per image
-    top5_prob, top5_catid = torch.topk(probabilities, 5)
-    print("Top 5 Results: \nLabels , Probabilities:")
-    for i in range(top5_prob.size(0)):
-        print(categories[top5_catid[i]], top5_prob[i].item())
-
-
-def main():
-    # 1. Basic setup
-    parser = argparse.ArgumentParser(description="PyTorch Image Classification Example")
-
-    parser.add_argument(
-        "--pytorch-only",
-        action="store_true",
-        default=False,
-        help="disables ONNX Runtime",
-    )
-    parser.add_argument(
-        "--labels",
-        type=str,
-        help="path to labels file")
-    parser.add_argument(
-        "--input-file",
-        type=str,
-        required=True,
-        help="path to input image file"
-    )
-    parser.add_argument(
-        "--provider",
-        type=str,
-        help="ONNX Runtime Execution Provider",
-    )
-    parser.add_argument(
-        "--backend",
-        type=str,
-        help="OpenVINO target device (CPU, GPU or MYRIAD)"
-    )
-    parser.add_argument(
-        "--precision",
-        type=str,
-        help="OpenVINO target device precision (FP16 or FP32)"
-    )
-
-    args = parser.parse_args()
-
-    # parameters validation
-    if not args.pytorch_only:
-        if args.provider is None:
-            print("OpenVINOExecutionProvider is enabled with CPU and FP32 by default.")
-        elif args.provider == "openvino":
-            if args.backend and args.precision:
-                if args.backend not in list(ov_backend_precisions.keys()):
-                    raise Exception(
-                        "Invalid backend. Valid values are:",
-                        list(ov_backend_precisions.keys()),
-                    )
-                if args.precision not in ov_backend_precisions[args.backend]:
-                    raise Exception("Invalid precision for provided backend. Valid values are:",
-                    list(ov_backend_precisions[args.backend]))
-            else:
-                print(
-                    "OpenVINOExecutionProvider is enabled with CPU and FP32 by default."
-                    + " Please specify both backend and precision to override.\n"
-                )
-        else:
-            raise Exception("Invalid execution provider!!")
-
-    # 2. Download and load the model
-    model = models.resnet50(pretrained=True)
-    if not args.pytorch_only:
-        if args.provider == "openvino" and (args.backend and args.precision):
-            provider_options = OpenVINOProviderOptions(
-                backend=args.backend, precision=args.precision
-            )
-            model = ORTInferenceModule(model, provider_options=provider_options)
-        else:
-            model = ORTInferenceModule(model)
-
-    # Convert model for evaluation
-    model.eval()
-
-    # 3. Download ImageNet labels
-    categories = download_labels(args.labels)
-
-    # 4. Read input image file and preprocess
-    if not args.input_file:
-        raise ValueError("Path to input image not provided!")
-    if not os.path.exists(args.input_file):
-        raise ValueError("Invalid input file path")
-    img = Image.open(args.input_file)
-    img_trans = preprocess(img)
-    # Adding batch dimension (size 1)
-    img_trans = torch.unsqueeze(img_trans, 0)
-
-    # 5. Infer
-    infer(model, img_trans, categories)
-
-
-if __name__ == "__main__":
-    main()
+# -------------------------------------------------------------------------
+# Copyright (C) 2022 Intel Corporation
+# Licensed under the MIT License
+# --------------------------------------------------------------------------
+
+import os
+import time
+import torch
+import wget
+import argparse
+from PIL import Image
+from torchvision import transforms
+import torchvision.models as models
+from torch_ort import ORTInferenceModule, OpenVINOProviderOptions
+
+ov_backend_precisions = {"CPU": ["FP32"], "GPU": ["FP32", "FP16"], "MYRIAD": ["FP16"]}
+inference_execution_providers = ["openvino"]
+
+def download_labels(labels):
+    if not labels:
+        labels = "imagenet_classes.txt"
+        if not os.path.exists(labels):
+            labelsUrl = (
+                "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
+            )
+            # Download the file (if we haven't already)
+            wget.download(labelsUrl)
+        else:
+            print("\nReusing downloaded imagenet labels")
+
+    # Read the categories
+    with open(labels, "r") as f:
+        categories = [s.strip() for s in f.readlines()]
+        return categories
+
+
+def preprocess(img):
+    transform = transforms.Compose(
+        [
+            transforms.Resize(256),
+            transforms.CenterCrop(224),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+        ]
+    )
+    return transform(img)
+
+
+def infer(model, image, categories):
+    # warmup
+    model(image)
+
+    # Start inference
+    t0 = time.time()
+    outputs = model(image)
+    t1 = time.time() - t0
+    print("\nInference time: {:.4f}ms\n".format(t1 * 1000))
+
+    # The output has unnormalized scores. Run a softmax on it for probabilities.
+    probabilities = torch.nn.functional.softmax(outputs[0], dim=0)
+
+    # Show top categories per image
+    top5_prob, top5_catid = torch.topk(probabilities, 5)
+    print("Top 5 Results: \nLabels , Probabilities:")
+    for i in range(top5_prob.size(0)):
+        print(categories[top5_catid[i]], top5_prob[i].item())
+
+
+def main():
+    # 1. Basic setup
+    parser = argparse.ArgumentParser(description="PyTorch Image Classification Example")
+
+    parser.add_argument(
+        "--pytorch-only",
+        action="store_true",
+        default=False,
+        help="disables ONNX Runtime inference",
+    )
+    parser.add_argument(
+        "--labels",
+        type=str,
+        help="path to labels file")
+    parser.add_argument(
+        "--input-file",
+        type=str,
+        required=True,
+        help="path to input image file"
+    )
+    parser.add_argument(
+        "--provider",
+        type=str,
+        help="ONNX Runtime Execution Provider",
+    )
+    parser.add_argument(
+        "--backend",
+        type=str,
+        help="OpenVINO target device (CPU, GPU or MYRIAD)"
+    )
+    parser.add_argument(
+        "--precision",
+        type=str,
+        help="OpenVINO target device precision (FP16 or FP32)"
+    )
+
+    args = parser.parse_args()
+
+    # parameters validation
+    if not args.pytorch_only:
+        if args.provider is None:
+            print("OpenVINOExecutionProvider is enabled with CPU and FP32 by default.")
+            if args.backend or args.precision:
+                raise ValueError("Provider not specified!! Please specify provider arg along with backend and precision.")
+        elif args.provider == "openvino":
+            if args.backend and args.precision:
+                if args.backend not in list(ov_backend_precisions.keys()):
+                    raise ValueError(
+                        "Invalid backend. Valid values are: {}".format(
+                            list(ov_backend_precisions.keys())))
+                if args.precision not in ov_backend_precisions[args.backend]:
+                    raise ValueError("Invalid precision for provided backend. Valid values are: {}".format(
+                    list(ov_backend_precisions[args.backend])))
+            elif args.backend or args.precision:
+                raise ValueError(
+                    "Please specify both backend and precision to override default options.\n"
+                )
+            else:
+                print("OpenVINOExecutionProvider is enabled with CPU and FP32 by default.")
+        else:
+            raise ValueError("Invalid execution provider!! Available providers are: {}".format(inference_execution_providers))
+    else:
+        print("ONNXRuntime inference is disabled.")
+        if args.provider or args.precision or args.backend:
+            raise ValueError("provider, backend, precision arguments are not applicable for --pytorch-only option.")
+
+    # 2. Read input image file and preprocess
+    if not args.input_file:
+        raise ValueError("Path to input image not provided!")
+    if not os.path.exists(args.input_file):
+        raise ValueError("Invalid input file path.")
+    img = Image.open(args.input_file)
+    img_trans = preprocess(img)
+    # Adding batch dimension (size 1)
+    img_trans = torch.unsqueeze(img_trans, 0)
+
+    # 3. Download and load the model
+    model = models.resnet50(pretrained=True)
+    if not args.pytorch_only:
+        if args.provider == "openvino" and (args.backend and args.precision):
+            provider_options = OpenVINOProviderOptions(
+                backend=args.backend, precision=args.precision
+            )
+            model = ORTInferenceModule(model, provider_options=provider_options)
+        else:
+            model = ORTInferenceModule(model)
+
+    # Convert model for evaluation
+    model.eval()
+
+    # 4. Download ImageNet labels
+    categories = download_labels(args.labels)
+
+    # 5. Infer
+    infer(model, img_trans, categories)
+    img.close()
+
+if __name__ == "__main__":
+    main()
diff --git a/torch_ort_inference/torch_ort/ortinferencemodule/ortinferencemodule.py b/torch_ort_inference/torch_ort/ortinferencemodule/ortinferencemodule.py
index 3d0fd066..dbfa6543 100644
--- a/torch_ort_inference/torch_ort/ortinferencemodule/ortinferencemodule.py
+++ b/torch_ort_inference/torch_ort/ortinferencemodule/ortinferencemodule.py
@@ -106,8 +106,7 @@ def _forward_call(self, *inputs, **kwargs):
 
         # Use IO binding
         onnx_input_names = [inp.name for inp in self._onnx_models.exported_model.graph.input]
-        input_info = _io.parse_inputs_for_onnx_export(self._module_parameters, None, schema, inputs, kwargs)
-        inputs = _utils_infer.get_user_inputs(onnx_input_names, input_info, inputs, kwargs, self._device)
+        inputs = _utils_infer.get_user_inputs(onnx_input_names, self._flattened_module._input_info, inputs, kwargs, self._device)
 
         io_binding = self._inference_session.io_binding()
         _utils._create_iobinding(io_binding, inputs, self._onnx_models.exported_model, self._device)