Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enabling L2+ Optimizations for EPs #23517

Draft
wants to merge 24 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions include/onnxruntime/core/graph/indexed_sub_graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ struct IndexedSubGraph {
return meta_def_.get();
}

/** Gets the mutable meta definition needed to represent this subgraph as a FunctionProto.
@returns MetaDef instance if it has been set. nullptr if not. */
MetaDef* GetMutableMetaDef() const {
return meta_def_.get();
}

private:
// subgraph meta definition.
std::unique_ptr<MetaDef> meta_def_;
Expand Down
6 changes: 6 additions & 0 deletions include/onnxruntime/core/optimizer/graph_transformer_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ InlinedVector<std::unique_ptr<GraphTransformer>> GenerateTransformers(
concurrency::ThreadPool* intra_op_thread_pool = nullptr,
std::unordered_map<std::string, std::unique_ptr<Tensor>>* p_buffered_tensors = nullptr);

/** Generates all predefined transformers for EPs */
InlinedVector<std::unique_ptr<GraphTransformer>> GenerateTransformersForEP(
const SessionOptions& session_options,
const IExecutionProvider& cpu_execution_provider, /*required by constant folding*/
const logging::Logger& logger);

#endif // !defined(ORT_MINIMAL_BUILD)

#if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
Expand Down
19 changes: 19 additions & 0 deletions onnxruntime/core/framework/compute_capability.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
// Licensed under the MIT License.

#pragma once
#include <functional>
#include "core/common/common.h"
#include "core/graph/indexed_sub_graph.h"
#include "core/graph/graph.h"

namespace onnxruntime {
// A structure encodes a subgraph and the method to run it.
Expand All @@ -21,5 +23,22 @@

ComputeCapability(std::unique_ptr<IndexedSubGraph> t_sub_graph)
: sub_graph(std::move(t_sub_graph)) {}

// Optional function to optimize this ComputeCapability.
// This will be called by ORT once the ComputeCapability is assigned to the EP
// Optimization: std::function<Status(const Graph& graph, const ComputeCapability& this_optimization, ComputeCapability& cc_to_update)>
std::function<Status(Graph&, const ComputeCapability&, ComputeCapability&)> optimization_func;

// Optional key/value strings to configure an optimizer
std::unordered_map<std::string, std::string> optimization_configs;

Check warning on line 33 in onnxruntime/core/framework/compute_capability.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <string> for string [build/include_what_you_use] [4] Raw Output: onnxruntime/core/framework/compute_capability.h:33: Add #include <string> for string [build/include_what_you_use] [4]

Check warning on line 33 in onnxruntime/core/framework/compute_capability.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <unordered_map> for unordered_map<> [build/include_what_you_use] [4] Raw Output: onnxruntime/core/framework/compute_capability.h:33: Add #include <unordered_map> for unordered_map<> [build/include_what_you_use] [4]

// optional ComputeCapability instances for sets of nodes within this ComputeCapability that should be optimized.
// when an optimization is applied, ORT will update this ComputeCapability to reflect the changes made.
// IndexedSubGraph.nodes:
// - update based on RemovedNode/AddNode calls
// IndexedSubGraph.MetaDef (if present):
// - inputs and outputs will be unchanged
// - constant_initializers MAY change if we constant fold an initializer during optimization
std::vector<std::unique_ptr<ComputeCapability>> nodes_to_optimize;

Check warning on line 42 in onnxruntime/core/framework/compute_capability.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <vector> for vector<> [build/include_what_you_use] [4] Raw Output: onnxruntime/core/framework/compute_capability.h:42: Add #include <vector> for vector<> [build/include_what_you_use] [4]
};
} // namespace onnxruntime
21 changes: 19 additions & 2 deletions onnxruntime/core/framework/graph_partitioner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -280,11 +280,14 @@ static Node* PlaceNode(Graph& graph, const IndexedSubGraph& capability,
IExecutionProvider::FusionStyle fusion_style,
const std::string& provider_type,
GraphPartitioner::Mode mode,
int& fused_node_unique_id) {
int& fused_node_unique_id,
bool* subgraph_assigned_to_ep) {
Node* result = nullptr;
*subgraph_assigned_to_ep = false;

if (nullptr == capability.GetMetaDef()) {
TryAssignSingleNode(graph, capability, provider_type);
*subgraph_assigned_to_ep = true;
} else {
// The <provider> can run a fused <sub_graph> in the <graph>.

Expand Down Expand Up @@ -347,6 +350,7 @@ static Node* PlaceNode(Graph& graph, const IndexedSubGraph& capability,
}
}
}
*subgraph_assigned_to_ep = true;
}
}

Expand Down Expand Up @@ -426,7 +430,20 @@ static Status PartitionOnnxFormatModelImpl(Graph& graph, FuncManager& func_mgr,
entry->sub_graph->GetMetaDef() != nullptr;
}));
for (auto& capability : capabilities) {
Node* n = PlaceNode(graph, *capability->sub_graph, fusion_style, type, mode, fused_node_unique_id);
bool subgraph_assigned_to_ep = false;
Node* n = PlaceNode(graph, *capability->sub_graph, fusion_style, type, mode, fused_node_unique_id, &subgraph_assigned_to_ep);

// If the subgraph is assigned to the EP and the ComputeCapability has nodes_to_optimize,
// run EP related optimizations and update ComputeCapability.
if (subgraph_assigned_to_ep && !capability->nodes_to_optimize.empty()) {
for (auto& optimization_cc : capability->nodes_to_optimize) {
if (optimization_cc->optimization_func) {
optimization_cc->optimization_func(graph, *optimization_cc, *capability);
// #TODO: Handle nested optimization ComputeCapability
}
}
}

if (n != nullptr) {
// searching in kernel registries, if no kernel registered for the fused_node, use compile approach
if (!KernelRegistryManager::HasImplementationOf(kernel_registry_mgr, *n, type, logger)) {
Expand Down
15 changes: 14 additions & 1 deletion onnxruntime/core/optimizer/constant_folding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,19 @@ ConstantFolding::ConstantFolding(const IExecutionProvider& execution_provider,
execution_provider_(execution_provider) {
}

ConstantFolding::ConstantFolding(const std::string& name,
const IExecutionProvider& execution_provider,
bool skip_dequantize_linear,
const ConfigOptions& config_options,
const InlinedHashSet<std::string_view>& compatible_execution_providers,
const InlinedHashSet<std::string>& excluded_initializers) noexcept
: GraphTransformer(name, compatible_execution_providers),
skip_dequantize_linear_(skip_dequantize_linear),
config_options_(config_options),
excluded_initializers_(excluded_initializers),
execution_provider_(execution_provider) {
}

// We need to handle a Shape node separately as the input doesn't need to be a constant initializer for
// Shape to be able to be constant folded.
static bool ConstantFoldShapeNode(Graph& graph, Node& node) {
Expand Down Expand Up @@ -144,7 +157,7 @@ Status ConstantFolding::ApplyImpl(Graph& graph, bool& modified, int graph_level,

for (NodeIndex i : order) {
auto* node = graph.GetNode(i);
if (!node) {
if (!node || !AllowConstantFolding(*node)) {
continue;
}

Expand Down
15 changes: 15 additions & 0 deletions onnxruntime/core/optimizer/constant_folding.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,21 @@ class ConstantFolding : public GraphTransformer {
const InlinedHashSet<std::string_view>& compatible_execution_providers = {},
const InlinedHashSet<std::string>& excluded_initializers = {}) noexcept;

protected:
/**
* Same as the constructor above but with a name provided by derived class.
*/
ConstantFolding(const std::string& name,
const IExecutionProvider& execution_provider,
bool skip_dequantize_linear,
const ConfigOptions& config_options,
const InlinedHashSet<std::string_view>& compatible_execution_providers = {},
const InlinedHashSet<std::string>& excluded_initializers = {}) noexcept;
/**
* Derived class can implement this virtual function to limit the nodes that can be constant folded.
*/
virtual bool AllowConstantFolding(const Node& node) const { return true; }

private:
Status ApplyImpl(Graph& graph, bool& modified, int graph_level, const logging::Logger& logger) const override;

Expand Down
118 changes: 118 additions & 0 deletions onnxruntime/core/optimizer/graph_optimizer_registry.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
Fixed Show fixed Hide fixed
// Licensed under the MIT License.

#include "core/optimizer/graph_optimizer_registry.h"
#include "core/optimizer/graph_transformer_utils.h"
#include "core/optimizer/selection_and_optimization_func.h"
#include "core/optimizer/qdq_transformer/constant_folding_dq_node.h"

using namespace onnxruntime;

Check warning on line 9 in onnxruntime/core/optimizer/graph_optimizer_registry.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Do not use namespace using-directives. Use using-declarations instead. [build/namespaces] [5] Raw Output: onnxruntime/core/optimizer/graph_optimizer_registry.cc:9: Do not use namespace using-directives. Use using-declarations instead. [build/namespaces] [5]
using namespace ::onnxruntime::common;

Check warning on line 10 in onnxruntime/core/optimizer/graph_optimizer_registry.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Do not use namespace using-directives. Use using-declarations instead. [build/namespaces] [5] Raw Output: onnxruntime/core/optimizer/graph_optimizer_registry.cc:10: Do not use namespace using-directives. Use using-declarations instead. [build/namespaces] [5]

namespace onnxruntime {

GraphOptimizerRegistry::GraphOptimizerRegistry() {
logger_ = &logging::LoggingManager::DefaultLogger();
}

common::Status GraphOptimizerRegistry::AddPredefinedOptimizerNames(std::vector<std::string>& optimizer_names) {
for (auto name : optimizer_names) {
if (name_to_transformer_map_.find(name) != name_to_transformer_map_.end()) {
LOGS(*logger_, WARNING) << "This transformer name is already added " << name;
return Status::OK();
}
name_to_transformer_map_[name] = nullptr; // The transformer will be instantizted only when EP requests it

if (name == kCONSTANT_FOLDING_DQ) {
transformer_name_to_selection_func_[name] = ConstantFoldingDQ_selection;
}
}
return Status::OK();
}

common::Status GraphOptimizerRegistry::CreateOptimizer(std::string& name, std::unordered_map<std::string, std::string>& key_value_configs) {

Check warning on line 33 in onnxruntime/core/optimizer/graph_optimizer_registry.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <unordered_map> for unordered_map<> [build/include_what_you_use] [4] Raw Output: onnxruntime/core/optimizer/graph_optimizer_registry.cc:33: Add #include <unordered_map> for unordered_map<> [build/include_what_you_use] [4]
if (name == kCONSTANT_FOLDING_DQ) {
const InlinedHashSet<NodeIndex> node_index_set = {};
auto transformer = std::make_unique<ConstantFoldingDQ>(*cpu_ep_, false /*skip_dequantize_linear*/,
session_options_->config_options, node_index_set);
Get()->Register(std::move(transformer));
return Status::OK();
}

LOGS(*logger_, WARNING) << "Can't create optimizer for " << name << ". It's not in the predefined optimizer list.";
return Status::OK();
}

common::Status GraphOptimizerRegistry::Register(std::unique_ptr<GraphTransformer> transformer) {
const auto& name = transformer->Name();
if (name_to_transformer_map_.find(name) != name_to_transformer_map_.end() &&
name_to_transformer_map_.at(name)) {
LOGS(*logger_, WARNING) << "This optimizer is already created and registered " << name;
return Status::OK();
}

name_to_transformer_map_[name] = transformer.get();
transformer_list_.push_back(std::move(transformer));

return Status::OK();
}

std::optional<std::function<std::vector<std::unique_ptr<ComputeCapability>>(const GraphViewer&)>> GraphOptimizerRegistry::GetSelectionFunc(std::string& name) const {
auto lookup = transformer_name_to_selection_func_.find(name);
if (lookup != transformer_name_to_selection_func_.end()) {
return transformer_name_to_selection_func_.at(name);
}
LOGS(*logger_, WARNING) << "Can't find selection function of " << name;
return std::nullopt;
}

GraphTransformer* GraphOptimizerRegistry::GetTransformerByName(std::string& name) const {
if (name_to_transformer_map_.find(name) != name_to_transformer_map_.end()) {
return name_to_transformer_map_.at(name);
}
return nullptr;
}

// Create and register all the predefined transformers for EP
common::Status GraphOptimizerRegistry::AddPredefinedOptimizers(
const onnxruntime::SessionOptions& sess_options,
const onnxruntime::IExecutionProvider& cpu_ep,
const logging::Logger& logger) {
// TODO: Apply optimization level here if we later decide to do so

Check warning on line 81 in onnxruntime/core/optimizer/graph_optimizer_registry.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2] Raw Output: onnxruntime/core/optimizer/graph_optimizer_registry.cc:81: Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2]
auto transformers_to_register = [&]() {
return optimizer_utils::GenerateTransformersForEP(sess_options, cpu_ep, logger);
}();

for (auto& entry : transformers_to_register) {
ORT_RETURN_IF_ERROR(Get()->Register(std::move(entry)));

Check warning on line 87 in onnxruntime/core/optimizer/graph_optimizer_registry.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <utility> for move [build/include_what_you_use] [4] Raw Output: onnxruntime/core/optimizer/graph_optimizer_registry.cc:87: Add #include <utility> for move [build/include_what_you_use] [4]
}
return Status::OK();
}

common::Status GraphOptimizerRegistry::ApplyTransformer(Graph& graph, std::string& name,

Check warning on line 92 in onnxruntime/core/optimizer/graph_optimizer_registry.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <string> for string [build/include_what_you_use] [4] Raw Output: onnxruntime/core/optimizer/graph_optimizer_registry.cc:92: Add #include <string> for string [build/include_what_you_use] [4]
const logging::Logger& logger) const {
auto transformer = GetTransformerByName(name);
if (!transformer) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "This transformer is not registered " + name);
}

bool modified = false;
ORT_RETURN_IF_ERROR(transformer->Apply(graph, modified, logger));

return Status::OK();
}

common::Status GraphOptimizerRegistry::AddCpuEpReference(onnxruntime::IExecutionProvider* cpu_ep) {
cpu_ep_ = cpu_ep;
return Status::OK();
}

common::Status GraphOptimizerRegistry::AddSessionOptionsReference(onnxruntime::SessionOptions* session_options) {
session_options_ = session_options;
return Status::OK();
}

// Initialize static members
std::shared_ptr<GraphOptimizerRegistry> onnxruntime::GraphOptimizerRegistry::graph_optimizer_registry = nullptr;

Check warning on line 116 in onnxruntime/core/optimizer/graph_optimizer_registry.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <memory> for shared_ptr<> [build/include_what_you_use] [4] Raw Output: onnxruntime/core/optimizer/graph_optimizer_registry.cc:116: Add #include <memory> for shared_ptr<> [build/include_what_you_use] [4]
std::mutex GraphOptimizerRegistry::registry_mutex;
} // namespace onnxruntime
105 changes: 105 additions & 0 deletions onnxruntime/core/optimizer/graph_optimizer_registry.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
Fixed Show fixed Hide fixed
// Licensed under the MIT License.

#pragma once

#include "core/common/inlined_containers.h"
#include "core/common/logging/logging.h"
#include "core/optimizer/graph_transformer.h"
#include "core/framework/execution_providers.h"
#include "core/framework/compute_capability.h"

namespace onnxruntime {
/**
* A registration/lookup class for re-usable optimizers for EPs.
*/
class GraphOptimizerRegistry {
public:
explicit GraphOptimizerRegistry();
GraphOptimizerRegistry(const GraphOptimizerRegistry&) = delete;

/**
* Get GraphOptimizerRegistry instance as a singleton.
*/
static std::shared_ptr<GraphOptimizerRegistry> Get() {
if (!graph_optimizer_registry) { // First Check (without locking)
std::lock_guard<std::mutex> lock(registry_mutex);
if (!graph_optimizer_registry) { // Second Check (with locking)
graph_optimizer_registry = std::make_shared<GraphOptimizerRegistry>();
}
}
return graph_optimizer_registry;
}

/**
* Register all the predefined optimizer names, only name not the optimizer instance.
*
* The optimizer will later be instantizted only when EP requests it by calling GetOptimizerByName in provider bridge.
*/
common::Status GraphOptimizerRegistry::AddPredefinedOptimizerNames(std::vector<std::string>& optimizer_names);

/**
* Create and register all predefined optimizers.
*/
common::Status AddPredefinedOptimizers(const onnxruntime::SessionOptions& sess_options,
const onnxruntime::IExecutionProvider& cpu_ep,
const logging::Logger& logger);

/**
* Create and register optimizer.
*/
common::Status GraphOptimizerRegistry::CreateOptimizer(std::string& name, std::unordered_map<std::string, std::string>& key_value_configs);

/**
* Get optimizer by name.
*/
GraphTransformer* GraphOptimizerRegistry::GetTransformerByName(std::string& name) const;

/**
* Run the optimizer.
*/
common::Status ApplyTransformer(Graph& graph, std::string& name,
const logging::Logger& logger) const;

/**
* Register optimizer and its optimization selection function.
*/
common::Status Register(std::unique_ptr<GraphTransformer> transformer);

/**
* Get optimizer selection function. If the optimizer name can't be found, return nullopt.
*/
std::optional<std::function<std::vector<std::unique_ptr<ComputeCapability>>(const GraphViewer&)>> GraphOptimizerRegistry::GetSelectionFunc(std::string& name) const;

/**
* Add CPU EP reference from InferenceSession as it's needed for some optimizers, ex: ConstantFoldingDQ.
*/
common::Status AddCpuEpReference(onnxruntime::IExecutionProvider* cpu_ep);

/**
* Get CPU EP reference.
*/
onnxruntime::IExecutionProvider* GetCpuEpReference() const { return cpu_ep_; }

/**
* Add session options reference from InferenceSession as it's needed for some optimizers, ex: ConstantFoldingDQ.
*/
common::Status AddSessionOptionsReference(onnxruntime::SessionOptions* session_options);

/**
* Get Session Options reference.
*/
onnxruntime::SessionOptions* GetSessionOptionsReference() const { return session_options_; }

private:
InlinedVector<std::unique_ptr<GraphTransformer>> transformer_list_;
InlinedHashMap<std::string, GraphTransformer*> name_to_transformer_map_;
InlinedHashMap<std::string, std::function<std::vector<std::unique_ptr<ComputeCapability>>(const GraphViewer&)>> transformer_name_to_selection_func_;
const logging::Logger* logger_;
onnxruntime::IExecutionProvider* cpu_ep_;
onnxruntime::SessionOptions* session_options_;

static std::shared_ptr<GraphOptimizerRegistry> graph_optimizer_registry;
static std::mutex registry_mutex;
};
} // namespace onnxruntime
Loading
Loading