intel · sfatimar · Dec 4, 2025 · Dec 4, 2025 · Dec 8, 2025 · Dec 8, 2025
diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -292,12 +292,12 @@ bool BackendManager::ModelHasSymbolicInputDims(const onnxruntime::GraphViewer& s
   }
 
   // For dynamic models with incomplete reshape coverage, clear shapes
-  if (has_symbolic_dims && !all_dynamic_inputs_covered) {
+  /* if (has_symbolic_dims && !all_dynamic_inputs_covered) {
     session_context_.reshape.clear();
     LOGS_DEFAULT(WARNING) << "reshape_input does not cover all dynamic dimensions, "
                           << "ignoring all provided shapes";
     return true;  // Model is dynamic
-  }
+  }*/
 
   // If shapes are valid with complete coverage for dynamic model, treat as concrete
   if (has_symbolic_dims && shapes_valid && all_dynamic_inputs_covered) {
@@ -476,7 +476,9 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
   };
 
   [[maybe_unused]] bool enable_ovep_qdq_optimizer = session_context_.enable_qdq_optimizer && IsQDQGraph(subgraph);
-  [[maybe_unused]] std::optional<bool> enable_compiler_qdq_optimization = queryOVProperty("NPU_QDQ_OPTIMIZATION", session_context_.device_type);
+  [[maybe_unused]] std::optional<bool> enable_compiler_qdq_optimization = false;
+  if (session_context_.device_type.find("NPU") != std::string::npos)
+     enable_compiler_qdq_optimization = queryOVProperty("NPU_QDQ_OPTIMIZATION", "NPU");
 #if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 0)) || (OPENVINO_VERSION_MAJOR > 2025))
   if (session_context_.device_type.find("NPU") != std::string::npos && session_context_.enable_qdq_optimizer) {
     if (enable_compiler_qdq_optimization.has_value() && enable_compiler_qdq_optimization.value()) {

diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -38,6 +38,16 @@ bool IsCILogEnabled() {
   return false;
 }
 
+std::string get_shapes_string(const reshape_t& shapes) {
+  std::stringstream ss;
+  for (auto& shape : shapes) {
+    if (!ss.str().empty())
+      ss << ", ";
+    ss << "\'" << shape.first << "': " << shape.second;
+  }
+  return ss.str();
+}
+
 std::shared_ptr<const OVNetwork>
 CreateOVModel(std::string&& model,
               const SessionContext& session_context,
@@ -46,17 +56,27 @@ CreateOVModel(std::string&& model,
     std::cout << "CreateNgraphFunc" << std::endl;
   }
   try {
-    auto ov_model = OVCore::Get()->ReadModel(std::move(model), session_context.onnx_model_path_name.string());
+     auto ov_model = OVCore::Get()->ReadModel(std::move(model), session_context.onnx_model_path_name.string());
+
+     if (!session_context.affinity.empty()) {
+       LOGS_DEFAULT(INFO) << log_tag << "Setting the ov nodes to specified affinity";
+       Set_Affinity(ov_model, session_context);
+     }
 
     if (!session_context.reshape.empty()) {
       LOGS_DEFAULT(INFO) << log_tag << "Reshaping the ov tensor to specified shape";
       ov_model->reshape(session_context.reshape);
     }
 
+     ov::preprocess::PrePostProcessor preproc(ov_model);
+     ov_model = preproc.build();
+
+
     if (!session_context.layout.empty()) {
       LOGS_DEFAULT(INFO) << log_tag << "Setting the ov tensor layout to specified layout";
       ov_model = Set_Layout(ov_model, session_context.layout);
     }
+
     // Check for Constant Folding
     if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) {
       ov::pass::ConstantFolding pass_const_obj;
@@ -141,6 +161,33 @@ std::shared_ptr<OVNetwork> Set_Layout(std::shared_ptr<OVNetwork> ov_model, const
   return preproc.build();
 }
 
+void Set_Affinity(std::shared_ptr<OVNetwork> ov_model, const SessionContext& session_context) {
+
+  std::string selected_device = "CPU";
+  if (auto delimit = session_context.device_type.find(":"); delimit != std::string::npos) {
+    auto device_mode = session_context.device_type.substr(0, delimit);
+    if (device_mode.find("HETERO") != std::string::npos) {
+      const auto& devices = session_context.device_type.substr(delimit + 1);
+      auto delimit_comma = devices.find(",");
+      selected_device = devices.substr(0, delimit_comma);
+    } else {
+      ORT_THROW("[ERROR] [OpenVINO] Invalid device_type is selected. Supported modes is HETERO");
+    }
+  } else {
+    ORT_THROW("[ERROR] [OpenVINO] Invalid device_type is selected. Supported modes is HETERO");
+  }
+
+  for (auto&& ov_node : ov_model->get_ops()) {
+     auto name = ov_node->get_friendly_name();
+     auto it = session_context.affinity.find(name);
+     if (it != session_context.affinity.end()) {
+       ov_node->get_rt_info()["affinity"] = it->second;
+     } else {
+       ov_node->get_rt_info()["affinity"] = selected_device;   
+     }
+  }
+}
+
 int GetFirstAvailableDevice(SessionContext& session_context) {
   int i = 0;
   // Get the first available VAD-M device and set the device to busy

diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h
@@ -106,6 +106,10 @@ void printPerformanceCounts(OVInferRequestPtr request, std::ostream& stream, std
 
 bool IsModelStreamXML(std::istream& model_stream);
 
+void Set_Affinity(std::shared_ptr<OVNetwork> ov_model, const SessionContext& session_context);
+
+std::string get_shapes_string(const reshape_t& shapes);
+
 }  // namespace backend_utils
 }  // namespace openvino_ep
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -99,6 +99,7 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
                                !session_context_.so_context_enable &&
                                session_context_.reshape.empty() &&
                                session_context_.layout.empty() &&
+                               session_context_.affinity.empty() &&
                                !enable_causallm &&
                                !eligible_for_cpu_fallback &&
                                auto_unified_compile);

diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@@ -82,14 +82,17 @@ struct OnnxToOvNetworkBindings {
           }
         }
 
-        ORT_ENFORCE(matched_names, log_tag,
-                    "Input names mismatch between OpenVINO and ONNX. ", onnx_name,
-                    " doesn't exist in the list of OpenVINO input tensor names");
+        //ORT_ENFORCE(matched_names, log_tag,
+        //            "Input names mismatch between OpenVINO and ONNX. ", onnx_name,
+        //            " doesn't exist in the list of OpenVINO input tensor names");
 
+        if (!matched_names) {
+          continue; 
+        }
         auto ov_param_index = std::distance(ov_parameters.begin(), it);
-
         auto shape = ov_parameters[ov_param_index].get_partial_shape();
         auto type = ov_parameters[ov_param_index].get_element_type();
+
         ParameterInfo info{onnx_name, ov_param_index, onnx_param_index, type, ParameterShape{shape}};
 
         // Analyze shape dynamism and set flags
@@ -112,7 +115,7 @@ struct OnnxToOvNetworkBindings {
           info.SetFullyDynamic(has_fully_dynamic);
           info.SetBoundedDynamic(has_bounded_dynamic);
         }
-
+        
         input_output_map.push_back(std::move(info));
       }
     };

diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
@@ -24,6 +24,7 @@ namespace fs = std::filesystem;
 using config_t = std::map<std::string, ov::AnyMap>;
 using reshape_t = std::map<std::string, ov::PartialShape>;
 using layout_t = std::map<std::string, ov::Layout>;
+using affinity_t = std::map<std::string, std::string>;
 
 struct ProviderInfo {
   std::string device_type{""};             // [device_type]: Overrides the accelerator hardware type and
@@ -43,6 +44,7 @@ struct ProviderInfo {
                                            // it will be directly loaded.
   reshape_t reshape{};                     // Used for reshaping the ov input tensor shape at runtime.
   layout_t layout{};                       // Used for specifying the ov input/output tensor layout at runtime.
+  affinity_t affinity{};                   // Used for specifying the nodes affinity at runtime.    
   std::string model_priority{"DEFAULT"};   // High-level OpenVINO model priority hint
                                            // Defines what model should be provided with more performant
                                            // bounded resource first
@@ -66,7 +68,7 @@ struct ProviderInfo {
   const ConfigOptions* config_options{NULL};
   const std::unordered_set<std::string> valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision",
                                                                "load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer",
-                                                               "enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout"};
+                                                               "enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout", "affinity"};
 };
 
 struct RuntimeConfig {

diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -184,7 +184,6 @@ common::Status OpenVINOExecutionProvider::Compile(
 
       for (const auto& fused_node_graph : fused_nodes) {
         const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph;
-
         // Set include_embed_data to true only for the first backend manager
         backend_it->TryExportCompiledBlobAsEPCtxNode(graph_body_viewer, is_first);
 
@@ -207,9 +206,9 @@ common::Status OpenVINOExecutionProvider::Compile(
   return status;
 }
 
-#ifdef USE_OVEP_NPU_MEMORY
+ #ifdef USE_OVEP_NPU_MEMORY
 std::vector<AllocatorPtr> OpenVINOExecutionProvider::CreatePreferredAllocators() {
-  if (session_context_.device_type.find("NPU") != std::string::npos) {
+  /* if (session_context_.device_type.find("NPU") != std::string::npos) {
     AllocatorCreationInfo npu_allocator_info{
         [this](OrtDevice::DeviceId device_id) {
           return std::make_unique<OVRTAllocator>(
@@ -223,9 +222,9 @@ std::vector<AllocatorPtr> OpenVINOExecutionProvider::CreatePreferredAllocators()
 
     // fill in allocator
     return std::vector<AllocatorPtr>{CreateAllocator(npu_allocator_info)};
-  } else {
+  } else {*/
     return std::vector<AllocatorPtr>{};
-  }
+  //}
 }
 #endif
 

diff --git a/onnxruntime/core/providers/openvino/openvino_parser_utils.cc b/onnxruntime/core/providers/openvino/openvino_parser_utils.cc
@@ -127,7 +127,7 @@ reshape_t OpenVINOParserUtils::ParseInputShape(const std::string& reshape_input_
 
   // Regular expressions for parsing
   const std::regex tensor_pattern(R"(([^\[\],]+)\s*\[(.*?)\])");  // e.g. "input_1[1..5, 2, 3..4],data[1,2,3]"
-  // const std::regex dimension_pattern(R"(\s*(\d+(?:\.\.\d+)?)\s*)");  // e.g. "1..5", "2", "3..4"
+  // const  dimension_pattern(R"(\s*(\d+(?:\.\.\d+)?)\s*)");  // e.g. "1..5", "2", "3..4"
   const std::regex dimension_pattern(R"(\s*([^,\s]+)\s*)");
   // Find all tensor shape definitions using regex
   auto tensor_begin = std::sregex_iterator(
@@ -310,5 +310,33 @@ bool OpenVINOParserUtils::Check_Valid_Layout(const std::string& layout_str, cons
   return true;
 }
 
+affinity_t OpenVINOParserUtils::ParseAffinity(const std::string& affinity_definition) {
+  LOGS_DEFAULT(INFO) << "[OpenVINO] Affinity is set : " << affinity_definition << "\n";
+  affinity_t result_map;
+
+  // Regex to capture device name and a list of nodes
+  // It captures:
+  // Group 1: device name (e.g., "device")
+  // Group 2: comma-separated list of nodes (e.g., "\"node1\", \"node2\"")
+  std::regex device_nodes_regex(R"(([^,\[\]]+)\[([^\]]+)\])");
+
+  std::sregex_iterator device_it(affinity_definition.begin(), affinity_definition.end(), device_nodes_regex);
+  std::sregex_iterator device_end;
+
+  for (; device_it != device_end; ++device_it) {
+    std::smatch device_match = *device_it;
+    std::string device_name = device_match[1].str();
+    std::string nodes_list_str = device_match[2].str();
+    std::stringstream nodes_list(nodes_list_str);
+    std::string item;
+
+    while (getline(nodes_list, item, ',')) {
+      result_map[item] = device_name; 
+    }
+  }
+
+  return result_map;
+}
+
 }  // namespace openvino_ep
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/openvino_parser_utils.h b/onnxruntime/core/providers/openvino/openvino_parser_utils.h
@@ -22,6 +22,7 @@ class OpenVINOParserUtils {
   static std::string TrimWhitespace(const std::string& str);
   static ov::Dimension ParseDimensionRange(const std::string& range_str, const std::string& tensor_name);
   static bool Check_Valid_Layout(const std::string& layout_str, const std::string& tensor_name);
+  static affinity_t ParseAffinity(const std::string& affinity_definition);
 };
 
 }  // namespace openvino_ep

diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@@ -266,6 +266,10 @@ static void ParseProviderInfo(const ProviderOptions& provider_options,
     pi.layout = OpenVINOParserUtils::ParseLayout(provider_options.at("layout"));
   }
 
+  if (provider_options.contains("affinity")) {
+    pi.affinity = OpenVINOParserUtils::ParseAffinity(provider_options.at("affinity"));
+  }
+
   if (provider_options.contains("load_config")) {
     auto parse_config = [&](const std::string& config_str) -> std::map<std::string, ov::AnyMap> {
       // If the config string is empty, return an empty map and skip processing

diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc
@@ -84,7 +84,7 @@ std::shared_ptr<OVNetwork> OVCore::ReadModel(std::string&& model, const std::str
     ov::frontend::InputModel::Ptr inputModel;
 
     ov::AnyVector params{&modelStream, model_path};
-
+    
     FE = manager.load_by_model(params);
     if (FE) {
       inputModel = FE->load(params);

diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -152,6 +152,7 @@ std::vector<SupportedOp> supported_op_mode = {
     {"GreaterOrEqual", V_2022_1, {"CPU", "GPU"}},
     {"GridSample", V_2022_3, {"CPU"}},
     {"GridSample", V_2023_0, {"GPU"}},
+    {"GroupQueryAttention", V_2025_0, {"CPU"}},
     {"GRU", V_2024_1, {"CPU", "GPU"}},
     {"HardMax", V_2023_1, {"CPU", "GPU"}},
     {"Identity", V_2020_4, {"CPU", "GPU"}},

diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -1083,7 +1083,7 @@ static std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory
     ProviderOptions OV_provider_options_map;
     const std::unordered_set<std::string> valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision",
                                                                  "load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer",
-                                                                 "enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout"};
+                                                                 "enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout", "affinity"};
     auto it = provider_options_map.find(type);
     if (it != provider_options_map.end()) {
       for (auto option : it->second) {

diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc
@@ -74,6 +74,7 @@ ABSL_FLAG(std::string, i, "",
           "  [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n"
           "  [OpenVINO only] [reshape_input]: Sets model input shapes with support for bounded dynamic dimensions using 'min..max' syntax (e.g., [1..10,3,224,224]) \n"
           "  [OpenVINO only] [layout]: Specifies the layout for inputs/outputs to interpret tensor dimensions correctly. \n"
+          "  [OpenVINO only] [affinity]: Specifies the affinity of a certain node to a specific device in Hetero Mode. \n"
           "  [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU num_of_threads|5 enable_opencl_throttling|true reshape_input|<input_name>[1,3,60,60..100] layout|<input_name>[NCHW] cache_dir|\"<path>\"\"\n"
           "\n"
           "  [QNN only] [backend_type]: QNN backend type. E.g., 'cpu', 'htp'. Mutually exclusive with 'backend_path'.\n"

diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
@@ -974,12 +974,14 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
         ov_options[key] = value;
       } else if (key == "layout") {
         ov_options[key] = value;
+      } else if (key == "affinity") {
+        ov_options[key] = value;
       } else {
         ORT_THROW(
             "[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO."
             " ['device_type', 'device_id', 'num_of_threads', 'load_config', 'cache_dir', 'num_streams', "
             "'enable_opencl_throttling', 'disable_dynamic_shapes', 'enable_qdq_optimizer',"
-            " 'enable_causallm', 'reshape_input', 'layout', 'model_priority'] \n");
+            " 'enable_causallm', 'reshape_input', 'layout', 'affinity', 'model_priority'] \n");
       }
     }
     session_options.AppendExecutionProvider_OpenVINO_V2(ov_options);