-
Notifications
You must be signed in to change notification settings - Fork 57
Don't embed external initializers into the proto to avoid 2GB limit #817
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
30e9d5f
70c2f37
80de8ef
ef6f23d
165a661
fe2cf8c
e6727b1
033b6f9
d4e41c9
cba53b3
8a5fe0e
2a0fa0a
a345737
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -21,6 +21,7 @@ | |||||
| #include "core/providers/openvino/ov_versions/capability.h" | ||||||
| #include "core/providers/openvino/qdq_transformations/qdq_stripping.h" | ||||||
| #include "core/providers/openvino/qdq_transformations/qdq_scales_fix.h" | ||||||
| #include "../../framework/tensorprotoutils.h" | ||||||
|
|
||||||
| namespace onnxruntime { | ||||||
| namespace openvino_ep { | ||||||
|
|
@@ -453,6 +454,80 @@ static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& on | |||||
| #endif | ||||||
| } | ||||||
|
|
||||||
| // this is a helper function to set the data fields, it duplicates ExternalDataInfo::SetExternalLocationToProto | ||||||
| // but we cannot use that function as it is not part of public provider api. | ||||||
| static void SetExternalDataFields(ONNX_NAMESPACE::TensorProto* proto_init, const void* data_ptr, int64_t data_size) { | ||||||
| static constexpr const char* ORT_INTERNAL_MEM_INITIALIZER = "*/_ORT_MEM_ADDR_/*"; | ||||||
| auto* external_data = proto_init->mutable_external_data(); | ||||||
| bool found_location = false, found_offset = false, found_length = false; | ||||||
| const int ext_data_size = external_data->size(); | ||||||
| proto_init->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL); | ||||||
|
|
||||||
| for (int j = 0; j < ext_data_size; ++j) { | ||||||
| auto& ext_entry = external_data->at(j); | ||||||
| auto& key = *ext_entry.mutable_key(); | ||||||
| if (key == "location") { | ||||||
| *ext_entry.mutable_value() = ORT_INTERNAL_MEM_INITIALIZER; | ||||||
| found_location = true; | ||||||
| } else if (key == "offset") { | ||||||
| *ext_entry.mutable_value() = std::to_string(reinterpret_cast<uintptr_t>(data_ptr)); | ||||||
| found_offset = true; | ||||||
| } else if (key == "length") { | ||||||
| *ext_entry.mutable_value() = std::to_string(data_size); | ||||||
| found_length = true; | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| if (!found_location) { | ||||||
| auto* new_entry = external_data->Add(); | ||||||
| *new_entry->mutable_key() = "location"; | ||||||
| *new_entry->mutable_value() = ORT_INTERNAL_MEM_INITIALIZER; | ||||||
| } | ||||||
| if (!found_offset) { | ||||||
| auto* new_entry = external_data->Add(); | ||||||
| *new_entry->mutable_key() = "offset"; | ||||||
| *new_entry->mutable_value() = std::to_string(reinterpret_cast<uintptr_t>(data_ptr)); | ||||||
| } | ||||||
| if (!found_length) { | ||||||
| auto* new_entry = external_data->Add(); | ||||||
| *new_entry->mutable_key() = "length"; | ||||||
| *new_entry->mutable_value() = std::to_string(data_size); | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| static void ReadExternalDataFields(const ONNX_NAMESPACE::TensorProto* src_init, std::string& location, size_t& offset, size_t& length) { | ||||||
| // Remove constness as we need to use mutable_external_data() to get the entries to read. | ||||||
| // The entries themselves are not modified... | ||||||
| auto& mutable_proto = *const_cast<ONNX_NAMESPACE::TensorProto*>(src_init); | ||||||
| auto* entry_protos = mutable_proto.mutable_external_data(); | ||||||
| for (int i = 0; i < entry_protos->size(); i++) { | ||||||
| auto& string_entry_proto{entry_protos->at(i)}; | ||||||
| const auto& pb_key{*(string_entry_proto.mutable_key())}; | ||||||
| const auto& pb_value{*(string_entry_proto.mutable_value())}; | ||||||
| if (pb_key == "location") { | ||||||
| location = pb_value; | ||||||
| } else if (pb_key == "offset") { | ||||||
| const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), offset); | ||||||
| if (res.ec != std::errc()) { | ||||||
| std::ostringstream err_msg; | ||||||
| err_msg << "External data in memory has invalid offset field: " | ||||||
| << src_init->name() << "], location: " << location | ||||||
| << ", offset: " << pb_value; | ||||||
| ORT_THROW(err_msg.str()); | ||||||
| } | ||||||
| } else if (pb_key == "length") { | ||||||
| const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), length); | ||||||
| if (res.ec != std::errc()) { | ||||||
| std::ostringstream err_msg; | ||||||
| err_msg << "External data in memory has invalid length field: " | ||||||
| << src_init->name() << "], location: " << location | ||||||
| << ", length: " << pb_value; | ||||||
| ORT_THROW(err_msg.str()); | ||||||
| } | ||||||
| } | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| std::unique_ptr<ONNX_NAMESPACE::ModelProto> | ||||||
| BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, | ||||||
| const onnxruntime::GraphViewer& subgraph, | ||||||
|
|
@@ -529,12 +604,92 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, | |||||
| return model_proto; | ||||||
| } else { | ||||||
| LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled"; | ||||||
|
|
||||||
| // scan ext initializers: | ||||||
| std::unordered_map<std::string, std::pair<size_t, size_t>> external_initializers_offset_and_length; | ||||||
| std::string tempLocation; | ||||||
| size_t extInitializerTotalSize = 0; | ||||||
| if (session_context_.has_external_weights) { | ||||||
| auto allInitializers = subgraph.GetAllInitializedTensors(); | ||||||
| for (auto& [name, tp] : allInitializers) { | ||||||
| if (utils::HasExternalDataInMemory(*tp)) { | ||||||
| size_t offset = 0; | ||||||
| size_t length = 0; | ||||||
| ReadExternalDataFields(tp, tempLocation, offset, length); | ||||||
| extInitializerTotalSize += length; | ||||||
| external_initializers_offset_and_length[name] = {offset, length}; | ||||||
| } | ||||||
| } | ||||||
intbf marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| } | ||||||
|
|
||||||
| // when we have external weights in memory, the model proto will actually embed those | ||||||
| // and bloat the serialized string. We can avoid that by not including the data in the proto | ||||||
| // but then we have to update those initializers and set the external_data fields to mem_addr tag... | ||||||
| // proto is limited to 2GB, but let's use 512MB as threshold to be conservative and still gain some memory reductions. | ||||||
| constexpr size_t MAX_EMBEDDED_INITIALIZER_SIZE = 1024 * 1024 * 512; | ||||||
| const bool include_initializer_data_in_proto = !(session_context_.has_external_weights == true && | ||||||
intbf marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
MayureshV1 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
MayureshV1 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| external_initializers_offset_and_length.size() > 1 && | ||||||
| extInitializerTotalSize > MAX_EMBEDDED_INITIALIZER_SIZE); | ||||||
|
|
||||||
| auto model = subgraph.CreateModel(logger); | ||||||
| auto model_proto = model->ToProto(); | ||||||
| model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION); | ||||||
| subgraph.ToProto(*model_proto->mutable_graph(), true, true); | ||||||
| subgraph.ToProto(*model_proto->mutable_graph(), /*include_initializers*/true, | ||||||
| /*include_outer_scope_args*/true, /*execution_order*/0, /*include_initializer_data*/include_initializer_data_in_proto); | ||||||
|
Comment on lines
+642
to
+643
|
||||||
|
|
||||||
|
||||||
intbf marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
intbf marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
Copilot
AI
Oct 4, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Casting offset (which is a size_t representing a memory offset) directly to (const void*) is incorrect. The offset should be added to a base pointer to get the actual memory address, not used as a pointer itself.
| SetExternalDataFields(proto_init, (const void*)offset, length); | |
| SetExternalDataFields(proto_init, static_cast<const void*>(static_cast<const uint8_t*>(external_initializers_data) + offset), length); |
Uh oh!
There was an error while loading. Please reload this page.