Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -292,12 +292,12 @@ bool BackendManager::ModelHasSymbolicInputDims(const onnxruntime::GraphViewer& s
}

// For dynamic models with incomplete reshape coverage, clear shapes
if (has_symbolic_dims && !all_dynamic_inputs_covered) {
/* if (has_symbolic_dims && !all_dynamic_inputs_covered) {
session_context_.reshape.clear();
LOGS_DEFAULT(WARNING) << "reshape_input does not cover all dynamic dimensions, "
<< "ignoring all provided shapes";
return true; // Model is dynamic
}
}*/

// If shapes are valid with complete coverage for dynamic model, treat as concrete
if (has_symbolic_dims && shapes_valid && all_dynamic_inputs_covered) {
Expand Down Expand Up @@ -476,7 +476,9 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
};

[[maybe_unused]] bool enable_ovep_qdq_optimizer = session_context_.enable_qdq_optimizer && IsQDQGraph(subgraph);
[[maybe_unused]] std::optional<bool> enable_compiler_qdq_optimization = queryOVProperty("NPU_QDQ_OPTIMIZATION", session_context_.device_type);
[[maybe_unused]] std::optional<bool> enable_compiler_qdq_optimization = false;
if (session_context_.device_type.find("NPU") != std::string::npos)
enable_compiler_qdq_optimization = queryOVProperty("NPU_QDQ_OPTIMIZATION", "NPU");
#if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 0)) || (OPENVINO_VERSION_MAJOR > 2025))
if (session_context_.device_type.find("NPU") != std::string::npos && session_context_.enable_qdq_optimizer) {
if (enable_compiler_qdq_optimization.has_value() && enable_compiler_qdq_optimization.value()) {
Expand Down
49 changes: 48 additions & 1 deletion onnxruntime/core/providers/openvino/backend_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,16 @@ bool IsCILogEnabled() {
return false;
}

std::string get_shapes_string(const reshape_t& shapes) {
std::stringstream ss;
for (auto& shape : shapes) {
if (!ss.str().empty())
ss << ", ";
ss << "\'" << shape.first << "': " << shape.second;
}
return ss.str();
}

std::shared_ptr<const OVNetwork>
CreateOVModel(std::string&& model,
const SessionContext& session_context,
Expand All @@ -46,17 +56,27 @@ CreateOVModel(std::string&& model,
std::cout << "CreateNgraphFunc" << std::endl;
}
try {
auto ov_model = OVCore::Get()->ReadModel(std::move(model), session_context.onnx_model_path_name.string());
auto ov_model = OVCore::Get()->ReadModel(std::move(model), session_context.onnx_model_path_name.string());

if (!session_context.affinity.empty()) {
LOGS_DEFAULT(INFO) << log_tag << "Setting the ov nodes to specified affinity";
Set_Affinity(ov_model, session_context);
}

if (!session_context.reshape.empty()) {
LOGS_DEFAULT(INFO) << log_tag << "Reshaping the ov tensor to specified shape";
ov_model->reshape(session_context.reshape);
}

ov::preprocess::PrePostProcessor preproc(ov_model);
ov_model = preproc.build();


if (!session_context.layout.empty()) {
LOGS_DEFAULT(INFO) << log_tag << "Setting the ov tensor layout to specified layout";
ov_model = Set_Layout(ov_model, session_context.layout);
}

// Check for Constant Folding
if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) {
ov::pass::ConstantFolding pass_const_obj;
Expand Down Expand Up @@ -141,6 +161,33 @@ std::shared_ptr<OVNetwork> Set_Layout(std::shared_ptr<OVNetwork> ov_model, const
return preproc.build();
}

void Set_Affinity(std::shared_ptr<OVNetwork> ov_model, const SessionContext& session_context) {

std::string selected_device = "CPU";
if (auto delimit = session_context.device_type.find(":"); delimit != std::string::npos) {
auto device_mode = session_context.device_type.substr(0, delimit);
if (device_mode.find("HETERO") != std::string::npos) {
const auto& devices = session_context.device_type.substr(delimit + 1);
auto delimit_comma = devices.find(",");
selected_device = devices.substr(0, delimit_comma);
} else {
ORT_THROW("[ERROR] [OpenVINO] Invalid device_type is selected. Supported modes is HETERO");
}
} else {
ORT_THROW("[ERROR] [OpenVINO] Invalid device_type is selected. Supported modes is HETERO");
}

for (auto&& ov_node : ov_model->get_ops()) {
auto name = ov_node->get_friendly_name();
auto it = session_context.affinity.find(name);
if (it != session_context.affinity.end()) {
ov_node->get_rt_info()["affinity"] = it->second;
} else {
ov_node->get_rt_info()["affinity"] = selected_device;
}
}
}

int GetFirstAvailableDevice(SessionContext& session_context) {
int i = 0;
// Get the first available VAD-M device and set the device to busy
Expand Down
4 changes: 4 additions & 0 deletions onnxruntime/core/providers/openvino/backend_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,10 @@ void printPerformanceCounts(OVInferRequestPtr request, std::ostream& stream, std

bool IsModelStreamXML(std::istream& model_stream);

void Set_Affinity(std::shared_ptr<OVNetwork> ov_model, const SessionContext& session_context);

std::string get_shapes_string(const reshape_t& shapes);

} // namespace backend_utils
} // namespace openvino_ep
} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
!session_context_.so_context_enable &&
session_context_.reshape.empty() &&
session_context_.layout.empty() &&
session_context_.affinity.empty() &&
!enable_causallm &&
!eligible_for_cpu_fallback &&
auto_unified_compile);
Expand Down
13 changes: 8 additions & 5 deletions onnxruntime/core/providers/openvino/backends/basic_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,17 @@ struct OnnxToOvNetworkBindings {
}
}

ORT_ENFORCE(matched_names, log_tag,
"Input names mismatch between OpenVINO and ONNX. ", onnx_name,
" doesn't exist in the list of OpenVINO input tensor names");
//ORT_ENFORCE(matched_names, log_tag,
// "Input names mismatch between OpenVINO and ONNX. ", onnx_name,
// " doesn't exist in the list of OpenVINO input tensor names");

if (!matched_names) {
continue;
}
auto ov_param_index = std::distance(ov_parameters.begin(), it);

auto shape = ov_parameters[ov_param_index].get_partial_shape();
auto type = ov_parameters[ov_param_index].get_element_type();

ParameterInfo info{onnx_name, ov_param_index, onnx_param_index, type, ParameterShape{shape}};

// Analyze shape dynamism and set flags
Expand All @@ -112,7 +115,7 @@ struct OnnxToOvNetworkBindings {
info.SetFullyDynamic(has_fully_dynamic);
info.SetBoundedDynamic(has_bounded_dynamic);
}

input_output_map.push_back(std::move(info));
}
};
Expand Down
4 changes: 3 additions & 1 deletion onnxruntime/core/providers/openvino/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ namespace fs = std::filesystem;
using config_t = std::map<std::string, ov::AnyMap>;
using reshape_t = std::map<std::string, ov::PartialShape>;
using layout_t = std::map<std::string, ov::Layout>;
using affinity_t = std::map<std::string, std::string>;

struct ProviderInfo {
std::string device_type{""}; // [device_type]: Overrides the accelerator hardware type and
Expand All @@ -43,6 +44,7 @@ struct ProviderInfo {
// it will be directly loaded.
reshape_t reshape{}; // Used for reshaping the ov input tensor shape at runtime.
layout_t layout{}; // Used for specifying the ov input/output tensor layout at runtime.
affinity_t affinity{}; // Used for specifying the nodes affinity at runtime.
std::string model_priority{"DEFAULT"}; // High-level OpenVINO model priority hint
// Defines what model should be provided with more performant
// bounded resource first
Expand All @@ -66,7 +68,7 @@ struct ProviderInfo {
const ConfigOptions* config_options{NULL};
const std::unordered_set<std::string> valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision",
"load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer",
"enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout"};
"enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout", "affinity"};
};

struct RuntimeConfig {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,6 @@ common::Status OpenVINOExecutionProvider::Compile(

for (const auto& fused_node_graph : fused_nodes) {
const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph;

// Set include_embed_data to true only for the first backend manager
backend_it->TryExportCompiledBlobAsEPCtxNode(graph_body_viewer, is_first);

Expand All @@ -207,9 +206,9 @@ common::Status OpenVINOExecutionProvider::Compile(
return status;
}

#ifdef USE_OVEP_NPU_MEMORY
#ifdef USE_OVEP_NPU_MEMORY
std::vector<AllocatorPtr> OpenVINOExecutionProvider::CreatePreferredAllocators() {
if (session_context_.device_type.find("NPU") != std::string::npos) {
/* if (session_context_.device_type.find("NPU") != std::string::npos) {
AllocatorCreationInfo npu_allocator_info{
[this](OrtDevice::DeviceId device_id) {
return std::make_unique<OVRTAllocator>(
Expand All @@ -223,9 +222,9 @@ std::vector<AllocatorPtr> OpenVINOExecutionProvider::CreatePreferredAllocators()

// fill in allocator
return std::vector<AllocatorPtr>{CreateAllocator(npu_allocator_info)};
} else {
} else {*/
return std::vector<AllocatorPtr>{};
}
//}
}
#endif

Expand Down
30 changes: 29 additions & 1 deletion onnxruntime/core/providers/openvino/openvino_parser_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ reshape_t OpenVINOParserUtils::ParseInputShape(const std::string& reshape_input_

// Regular expressions for parsing
const std::regex tensor_pattern(R"(([^\[\],]+)\s*\[(.*?)\])"); // e.g. "input_1[1..5, 2, 3..4],data[1,2,3]"
// const std::regex dimension_pattern(R"(\s*(\d+(?:\.\.\d+)?)\s*)"); // e.g. "1..5", "2", "3..4"
// const dimension_pattern(R"(\s*(\d+(?:\.\.\d+)?)\s*)"); // e.g. "1..5", "2", "3..4"
const std::regex dimension_pattern(R"(\s*([^,\s]+)\s*)");
// Find all tensor shape definitions using regex
auto tensor_begin = std::sregex_iterator(
Expand Down Expand Up @@ -310,5 +310,33 @@ bool OpenVINOParserUtils::Check_Valid_Layout(const std::string& layout_str, cons
return true;
}

affinity_t OpenVINOParserUtils::ParseAffinity(const std::string& affinity_definition) {
LOGS_DEFAULT(INFO) << "[OpenVINO] Affinity is set : " << affinity_definition << "\n";
affinity_t result_map;

// Regex to capture device name and a list of nodes
// It captures:
// Group 1: device name (e.g., "device")
// Group 2: comma-separated list of nodes (e.g., "\"node1\", \"node2\"")
std::regex device_nodes_regex(R"(([^,\[\]]+)\[([^\]]+)\])");

std::sregex_iterator device_it(affinity_definition.begin(), affinity_definition.end(), device_nodes_regex);
std::sregex_iterator device_end;

for (; device_it != device_end; ++device_it) {
std::smatch device_match = *device_it;
std::string device_name = device_match[1].str();
std::string nodes_list_str = device_match[2].str();
std::stringstream nodes_list(nodes_list_str);
std::string item;

while (getline(nodes_list, item, ',')) {
result_map[item] = device_name;
}
}

return result_map;
}

} // namespace openvino_ep
} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class OpenVINOParserUtils {
static std::string TrimWhitespace(const std::string& str);
static ov::Dimension ParseDimensionRange(const std::string& range_str, const std::string& tensor_name);
static bool Check_Valid_Layout(const std::string& layout_str, const std::string& tensor_name);
static affinity_t ParseAffinity(const std::string& affinity_definition);
};

} // namespace openvino_ep
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,10 @@ static void ParseProviderInfo(const ProviderOptions& provider_options,
pi.layout = OpenVINOParserUtils::ParseLayout(provider_options.at("layout"));
}

if (provider_options.contains("affinity")) {
pi.affinity = OpenVINOParserUtils::ParseAffinity(provider_options.at("affinity"));
}

if (provider_options.contains("load_config")) {
auto parse_config = [&](const std::string& config_str) -> std::map<std::string, ov::AnyMap> {
// If the config string is empty, return an empty map and skip processing
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/openvino/ov_interface.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ std::shared_ptr<OVNetwork> OVCore::ReadModel(std::string&& model, const std::str
ov::frontend::InputModel::Ptr inputModel;

ov::AnyVector params{&modelStream, model_path};

FE = manager.load_by_model(params);
if (FE) {
inputModel = FE->load(params);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ std::vector<SupportedOp> supported_op_mode = {
{"GreaterOrEqual", V_2022_1, {"CPU", "GPU"}},
{"GridSample", V_2022_3, {"CPU"}},
{"GridSample", V_2023_0, {"GPU"}},
{"GroupQueryAttention", V_2025_0, {"CPU"}},
{"GRU", V_2024_1, {"CPU", "GPU"}},
{"HardMax", V_2023_1, {"CPU", "GPU"}},
{"Identity", V_2020_4, {"CPU", "GPU"}},
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/python/onnxruntime_pybind_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1083,7 +1083,7 @@ static std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory
ProviderOptions OV_provider_options_map;
const std::unordered_set<std::string> valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision",
"load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer",
"enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout"};
"enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout", "affinity"};
auto it = provider_options_map.find(type);
if (it != provider_options_map.end()) {
for (auto option : it->second) {
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/test/perftest/command_args_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ ABSL_FLAG(std::string, i, "",
" [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n"
" [OpenVINO only] [reshape_input]: Sets model input shapes with support for bounded dynamic dimensions using 'min..max' syntax (e.g., [1..10,3,224,224]) \n"
" [OpenVINO only] [layout]: Specifies the layout for inputs/outputs to interpret tensor dimensions correctly. \n"
" [OpenVINO only] [affinity]: Specifies the affinity of a certain node to a specific device in Hetero Mode. \n"
" [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU num_of_threads|5 enable_opencl_throttling|true reshape_input|<input_name>[1,3,60,60..100] layout|<input_name>[NCHW] cache_dir|\"<path>\"\"\n"
"\n"
" [QNN only] [backend_type]: QNN backend type. E.g., 'cpu', 'htp'. Mutually exclusive with 'backend_path'.\n"
Expand Down
4 changes: 3 additions & 1 deletion onnxruntime/test/perftest/ort_test_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -974,12 +974,14 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
ov_options[key] = value;
} else if (key == "layout") {
ov_options[key] = value;
} else if (key == "affinity") {
ov_options[key] = value;
} else {
ORT_THROW(
"[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO."
" ['device_type', 'device_id', 'num_of_threads', 'load_config', 'cache_dir', 'num_streams', "
"'enable_opencl_throttling', 'disable_dynamic_shapes', 'enable_qdq_optimizer',"
" 'enable_causallm', 'reshape_input', 'layout', 'model_priority'] \n");
" 'enable_causallm', 'reshape_input', 'layout', 'affinity', 'model_priority'] \n");
}
}
session_options.AppendExecutionProvider_OpenVINO_V2(ov_options);
Expand Down