-
Notifications
You must be signed in to change notification settings - Fork 55
Don't embed external initializers into the proto to avoid 2GB limit #817
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
30e9d5f
70c2f37
80de8ef
ef6f23d
165a661
fe2cf8c
e6727b1
033b6f9
d4e41c9
cba53b3
8a5fe0e
2a0fa0a
a345737
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -21,6 +21,7 @@ | |||||
#include "core/providers/openvino/ov_versions/capability.h" | ||||||
#include "core/providers/openvino/qdq_transformations/qdq_stripping.h" | ||||||
#include "core/providers/openvino/qdq_transformations/qdq_scales_fix.h" | ||||||
#include "../../framework/tensorprotoutils.h" | ||||||
|
||||||
namespace onnxruntime { | ||||||
namespace openvino_ep { | ||||||
|
@@ -453,6 +454,80 @@ static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& on | |||||
#endif | ||||||
} | ||||||
|
||||||
// this is a helper function to set the data fields, it duplicates ExternalDataInfo::SetExternalLocationToProto | ||||||
// but we cannot use that function as it is not part of public provider api. | ||||||
static void SetExternalDataFields(ONNX_NAMESPACE::TensorProto* proto_init, const void* data_ptr, int64_t data_size) { | ||||||
static constexpr const char* ORT_INTERNAL_MEM_INITIALIZER = "*/_ORT_MEM_ADDR_/*"; | ||||||
auto* external_data = proto_init->mutable_external_data(); | ||||||
bool found_location = false, found_offset = false, found_length = false; | ||||||
const int ext_data_size = external_data->size(); | ||||||
proto_init->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL); | ||||||
|
||||||
for (int j = 0; j < ext_data_size; ++j) { | ||||||
auto& ext_entry = external_data->at(j); | ||||||
auto& key = *ext_entry.mutable_key(); | ||||||
if (key == "location") { | ||||||
*ext_entry.mutable_value() = ORT_INTERNAL_MEM_INITIALIZER; | ||||||
found_location = true; | ||||||
} else if (key == "offset") { | ||||||
*ext_entry.mutable_value() = std::to_string(reinterpret_cast<uintptr_t>(data_ptr)); | ||||||
found_offset = true; | ||||||
} else if (key == "length") { | ||||||
*ext_entry.mutable_value() = std::to_string(data_size); | ||||||
found_length = true; | ||||||
} | ||||||
} | ||||||
|
||||||
if (!found_location) { | ||||||
auto* new_entry = external_data->Add(); | ||||||
*new_entry->mutable_key() = "location"; | ||||||
*new_entry->mutable_value() = ORT_INTERNAL_MEM_INITIALIZER; | ||||||
} | ||||||
if (!found_offset) { | ||||||
auto* new_entry = external_data->Add(); | ||||||
*new_entry->mutable_key() = "offset"; | ||||||
*new_entry->mutable_value() = std::to_string(reinterpret_cast<uintptr_t>(data_ptr)); | ||||||
} | ||||||
if (!found_length) { | ||||||
auto* new_entry = external_data->Add(); | ||||||
*new_entry->mutable_key() = "length"; | ||||||
*new_entry->mutable_value() = std::to_string(data_size); | ||||||
} | ||||||
} | ||||||
|
||||||
static void ReadExternalDataFields(const ONNX_NAMESPACE::TensorProto* src_init, std::string& location, size_t& offset, size_t& length) { | ||||||
// Remove constness as we need to use mutable_external_data() to get the entries to read. | ||||||
// The entries themselves are not modified... | ||||||
auto& mutable_proto = *const_cast<ONNX_NAMESPACE::TensorProto*>(src_init); | ||||||
auto* entry_protos = mutable_proto.mutable_external_data(); | ||||||
for (int i = 0; i < entry_protos->size(); i++) { | ||||||
auto& string_entry_proto{entry_protos->at(i)}; | ||||||
const auto& pb_key{*(string_entry_proto.mutable_key())}; | ||||||
const auto& pb_value{*(string_entry_proto.mutable_value())}; | ||||||
if (pb_key == "location") { | ||||||
location = pb_value; | ||||||
} else if (pb_key == "offset") { | ||||||
const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), offset); | ||||||
if (res.ec != std::errc()) { | ||||||
std::ostringstream err_msg; | ||||||
err_msg << "External data in memory has invalid offset field: " | ||||||
<< src_init->name() << "], location: " << location | ||||||
<< ", offset: " << pb_value; | ||||||
ORT_THROW(err_msg.str()); | ||||||
} | ||||||
} else if (pb_key == "length") { | ||||||
const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), length); | ||||||
if (res.ec != std::errc()) { | ||||||
std::ostringstream err_msg; | ||||||
err_msg << "External data in memory has invalid length field: " | ||||||
<< src_init->name() << "], location: " << location | ||||||
<< ", length: " << pb_value; | ||||||
ORT_THROW(err_msg.str()); | ||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
std::unique_ptr<ONNX_NAMESPACE::ModelProto> | ||||||
BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, | ||||||
const onnxruntime::GraphViewer& subgraph, | ||||||
|
@@ -529,12 +604,98 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, | |||||
return model_proto; | ||||||
} else { | ||||||
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled"; | ||||||
|
||||||
// scan ext initializers: | ||||||
std::unordered_map<std::string, std::pair<size_t, size_t>> external_initializers_offset_and_length; | ||||||
std::string tempLocation; | ||||||
size_t extInitializerTotalSize = 0; | ||||||
if (session_context_.has_external_weights) { | ||||||
auto allInitializers = subgraph.GetAllInitializedTensors(); | ||||||
for (auto& [name, tp] : allInitializers) { | ||||||
if (utils::HasExternalDataInMemory(*tp)) { | ||||||
size_t offset = 0; | ||||||
size_t length = 0; | ||||||
ReadExternalDataFields(tp, tempLocation, offset, length); | ||||||
extInitializerTotalSize += length; | ||||||
external_initializers_offset_and_length[name] = {offset, length}; | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
// when we have external weights in memory, the model proto will actually embed those | ||||||
// and bloat the serialized string. We can avoid that by not including the data in the proto | ||||||
// but then we have to update those initializers and set the external_data fields to mem_addr tag... | ||||||
// proto is limited to 2GB, but let's use 32MB as threshold to be conservative and still gain some memory reductions. | ||||||
#if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 3)) || (OPENVINO_VERSION_MAJOR > 2025)) | ||||||
constexpr size_t MAX_EMBEDDED_INITIALIZER_SIZE = 1024 * 1024 * 32; | ||||||
const bool include_initializer_data_in_proto = !(session_context_.has_external_weights && | ||||||
external_initializers_offset_and_length.size() > 1 && | ||||||
extInitializerTotalSize >= MAX_EMBEDDED_INITIALIZER_SIZE); | ||||||
#else | ||||||
const bool include_initializer_data_in_proto = true; | ||||||
#endif | ||||||
|
||||||
|
||||||
auto model = subgraph.CreateModel(logger); | ||||||
auto model_proto = model->ToProto(); | ||||||
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION); | ||||||
subgraph.ToProto(*model_proto->mutable_graph(), true, true); | ||||||
subgraph.ToProto(*model_proto->mutable_graph(), /*include_initializers*/true, | ||||||
/*include_outer_scope_args*/true, /*execution_order*/0, /*include_initializer_data*/include_initializer_data_in_proto); | ||||||
Comment on lines
+642
to
+643
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The execution_order parameter is hardcoded to 0. Consider using a named constant or adding a comment explaining why 0 is the appropriate default value. Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback |
||||||
|
||||||
print_model_proto_duration(); | ||||||
|
||||||
if (!include_initializer_data_in_proto) { | ||||||
LOGS(logger, INFO) << "Initializer data is not included in the model proto. Updating metadata..., total size " << extInitializerTotalSize / (1024 * 1024) << " MB in " << external_initializers_offset_and_length.size() << " initializers"; | ||||||
auto* graph_proto = model_proto->mutable_graph(); | ||||||
auto* proto_initializers = graph_proto->mutable_initializer(); | ||||||
|
||||||
std::unordered_map<std::string, ONNX_NAMESPACE::TensorProto*> proto_initializer_map; | ||||||
for (int i = 0, n = proto_initializers->size(); i < n; ++i) { | ||||||
auto& proto_init = proto_initializers->at(i); | ||||||
proto_initializer_map[proto_init.name()] = &proto_init; | ||||||
} | ||||||
|
||||||
for (const auto& [name, src_init] : subgraph.GetAllInitializedTensors()) { | ||||||
auto it = proto_initializer_map.find(name); | ||||||
if (it == proto_initializer_map.end()) | ||||||
continue; | ||||||
|
||||||
auto* proto_init = it->second; | ||||||
|
||||||
// If the proto initializer is missing data, fill it in | ||||||
if (!proto_init->has_raw_data() && src_init->has_raw_data()) { | ||||||
*proto_init->mutable_raw_data() = src_init->raw_data(); | ||||||
} | ||||||
|
||||||
// Only set in-memory external_data fields if the data is in memory | ||||||
if (src_init->has_raw_data()) { | ||||||
LOGS(logger, VERBOSE) << "In-memory initializer RAW: " | ||||||
<< src_init->name() | ||||||
<< ", data_type: " << src_init->data_type() | ||||||
<< ", raw_data size: " << src_init->raw_data().size(); | ||||||
|
||||||
SetExternalDataFields(proto_init, src_init->raw_data().data(), src_init->raw_data().size()); | ||||||
} else if (onnxruntime::utils::HasExternalDataInMemory(*src_init)) { | ||||||
auto it_ext = external_initializers_offset_and_length.find(name); | ||||||
if (it_ext == external_initializers_offset_and_length.end()) { | ||||||
std::ostringstream err_msg; | ||||||
err_msg << "Initializer marked as external in memory but missing offset/length info: " << src_init->name(); | ||||||
ORT_THROW(err_msg.str()); | ||||||
} | ||||||
const size_t offset = it_ext->second.first; | ||||||
const size_t length = it_ext->second.second; | ||||||
|
||||||
LOGS(logger, VERBOSE) << "In-memory initializer EXT: " << src_init->name() << ", size: " << length; | ||||||
|
||||||
SetExternalDataFields(proto_init, (const void*)offset, length); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Casting
Suggested change
Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback |
||||||
} else { | ||||||
LOGS(logger, VERBOSE) << "File-based initializer: " << src_init->name() << ", data_type: " << src_init->data_type(); | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node); | ||||||
|
||||||
return model_proto; | ||||||
} | ||||||
} | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The condition
external_initializers_offset_and_length.size() > 1
seems arbitrary. Consider using a named constant or documenting why specifically more than 1 external initializer is required to trigger this optimization.Copilot uses AI. Check for mistakes.