Skip to content

Commit 7eafcb3

Browse files
committed
Revert "Don't embed external initializers into the proto to avoid 2GB limit (#817)"
This reverts commit f8b0904.
1 parent f8b0904 commit 7eafcb3

File tree

2 files changed

+1
-377
lines changed

2 files changed

+1
-377
lines changed

onnxruntime/core/providers/openvino/backend_manager.cc

Lines changed: 1 addition & 162 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
#include "core/providers/openvino/ov_versions/capability.h"
2222
#include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
2323
#include "core/providers/openvino/qdq_transformations/qdq_scales_fix.h"
24-
#include "../../framework/tensorprotoutils.h"
2524

2625
namespace onnxruntime {
2726
namespace openvino_ep {
@@ -454,80 +453,6 @@ static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& on
454453
#endif
455454
}
456455

457-
// this is a helper function to set the data fields, it duplicates ExternalDataInfo::SetExternalLocationToProto
458-
// but we cannot use that function as it is not part of public provider api.
459-
static void SetExternalDataFields(ONNX_NAMESPACE::TensorProto* proto_init, const void* data_ptr, int64_t data_size) {
460-
static constexpr const char* ORT_INTERNAL_MEM_INITIALIZER = "*/_ORT_MEM_ADDR_/*";
461-
auto* external_data = proto_init->mutable_external_data();
462-
bool found_location = false, found_offset = false, found_length = false;
463-
const int ext_data_size = external_data->size();
464-
proto_init->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL);
465-
466-
for (int j = 0; j < ext_data_size; ++j) {
467-
auto& ext_entry = external_data->at(j);
468-
auto& key = *ext_entry.mutable_key();
469-
if (key == "location") {
470-
*ext_entry.mutable_value() = ORT_INTERNAL_MEM_INITIALIZER;
471-
found_location = true;
472-
} else if (key == "offset") {
473-
*ext_entry.mutable_value() = std::to_string(reinterpret_cast<uintptr_t>(data_ptr));
474-
found_offset = true;
475-
} else if (key == "length") {
476-
*ext_entry.mutable_value() = std::to_string(data_size);
477-
found_length = true;
478-
}
479-
}
480-
481-
if (!found_location) {
482-
auto* new_entry = external_data->Add();
483-
*new_entry->mutable_key() = "location";
484-
*new_entry->mutable_value() = ORT_INTERNAL_MEM_INITIALIZER;
485-
}
486-
if (!found_offset) {
487-
auto* new_entry = external_data->Add();
488-
*new_entry->mutable_key() = "offset";
489-
*new_entry->mutable_value() = std::to_string(reinterpret_cast<uintptr_t>(data_ptr));
490-
}
491-
if (!found_length) {
492-
auto* new_entry = external_data->Add();
493-
*new_entry->mutable_key() = "length";
494-
*new_entry->mutable_value() = std::to_string(data_size);
495-
}
496-
}
497-
498-
static void ReadExternalDataFields(const ONNX_NAMESPACE::TensorProto* src_init, std::string& location, size_t& offset, size_t& length) {
499-
// Remove constness as we need to use mutable_external_data() to get the entries to read.
500-
// The entries themselves are not modified...
501-
auto& mutable_proto = *const_cast<ONNX_NAMESPACE::TensorProto*>(src_init);
502-
auto* entry_protos = mutable_proto.mutable_external_data();
503-
for (int i = 0; i < entry_protos->size(); i++) {
504-
auto& string_entry_proto{entry_protos->at(i)};
505-
const auto& pb_key{*(string_entry_proto.mutable_key())};
506-
const auto& pb_value{*(string_entry_proto.mutable_value())};
507-
if (pb_key == "location") {
508-
location = pb_value;
509-
} else if (pb_key == "offset") {
510-
const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), offset);
511-
if (res.ec != std::errc()) {
512-
std::ostringstream err_msg;
513-
err_msg << "External data in memory has invalid offset field: "
514-
<< src_init->name() << "], location: " << location
515-
<< ", offset: " << pb_value;
516-
ORT_THROW(err_msg.str());
517-
}
518-
} else if (pb_key == "length") {
519-
const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), length);
520-
if (res.ec != std::errc()) {
521-
std::ostringstream err_msg;
522-
err_msg << "External data in memory has invalid length field: "
523-
<< src_init->name() << "], location: " << location
524-
<< ", length: " << pb_value;
525-
ORT_THROW(err_msg.str());
526-
}
527-
}
528-
}
529-
}
530-
531456
std::unique_ptr<ONNX_NAMESPACE::ModelProto>
532457
BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
533458
const onnxruntime::GraphViewer& subgraph,
@@ -604,98 +529,12 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
604529
return model_proto;
605530
} else {
606531
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled";
607-
608-
// scan ext initializers:
609-
std::unordered_map<std::string, std::pair<size_t, size_t>> external_initializers_offset_and_length;
610-
std::string tempLocation;
611-
size_t extInitializerTotalSize = 0;
612-
if (session_context_.has_external_weights) {
613-
auto allInitializers = subgraph.GetAllInitializedTensors();
614-
for (auto& [name, tp] : allInitializers) {
615-
if (utils::HasExternalDataInMemory(*tp)) {
616-
size_t offset = 0;
617-
size_t length = 0;
618-
ReadExternalDataFields(tp, tempLocation, offset, length);
619-
extInitializerTotalSize += length;
620-
external_initializers_offset_and_length[name] = {offset, length};
621-
}
622-
}
623-
}
624-
625-
// when we have external weights in memory, the model proto will actually embed those
626-
// and bloat the serialized string. We can avoid that by not including the data in the proto
627-
// but then we have to update those initializers and set the external_data fields to mem_addr tag...
628-
// proto is limited to 2GB, but let's use 32MB as threshold to be conservative and still gain some memory reductions.
629-
#if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 3)) || (OPENVINO_VERSION_MAJOR > 2025))
630-
constexpr size_t MAX_EMBEDDED_INITIALIZER_SIZE = 1024 * 1024 * 32;
631-
const bool include_initializer_data_in_proto = !(session_context_.has_external_weights &&
632-
external_initializers_offset_and_length.size() > 1 &&
633-
extInitializerTotalSize >= MAX_EMBEDDED_INITIALIZER_SIZE);
634-
#else
635-
const bool include_initializer_data_in_proto = true;
636-
#endif
637-
638-
639532
auto model = subgraph.CreateModel(logger);
640533
auto model_proto = model->ToProto();
641534
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
642-
subgraph.ToProto(*model_proto->mutable_graph(), /*include_initializers*/true,
643-
/*include_outer_scope_args*/true, /*execution_order*/0, /*include_initializer_data*/include_initializer_data_in_proto);
644-
535+
subgraph.ToProto(*model_proto->mutable_graph(), true, true);
645536
print_model_proto_duration();
646-
647-
if (!include_initializer_data_in_proto) {
648-
LOGS(logger, INFO) << "Initializer data is not included in the model proto. Updating metadata..., total size " << extInitializerTotalSize / (1024 * 1024) << " MB in " << external_initializers_offset_and_length.size() << " initializers";
649-
auto* graph_proto = model_proto->mutable_graph();
650-
auto* proto_initializers = graph_proto->mutable_initializer();
651-
652-
std::unordered_map<std::string, ONNX_NAMESPACE::TensorProto*> proto_initializer_map;
653-
for (int i = 0, n = proto_initializers->size(); i < n; ++i) {
654-
auto& proto_init = proto_initializers->at(i);
655-
proto_initializer_map[proto_init.name()] = &proto_init;
656-
}
657-
658-
for (const auto& [name, src_init] : subgraph.GetAllInitializedTensors()) {
659-
auto it = proto_initializer_map.find(name);
660-
if (it == proto_initializer_map.end())
661-
continue;
662-
663-
auto* proto_init = it->second;
664-
665-
// If the proto initializer is missing data, fill it in
666-
if (!proto_init->has_raw_data() && src_init->has_raw_data()) {
667-
*proto_init->mutable_raw_data() = src_init->raw_data();
668-
}
669-
670-
// Only set in-memory external_data fields if the data is in memory
671-
if (src_init->has_raw_data()) {
672-
LOGS(logger, VERBOSE) << "In-memory initializer RAW: "
673-
<< src_init->name()
674-
<< ", data_type: " << src_init->data_type()
675-
<< ", raw_data size: " << src_init->raw_data().size();
676-
677-
SetExternalDataFields(proto_init, src_init->raw_data().data(), src_init->raw_data().size());
678-
} else if (onnxruntime::utils::HasExternalDataInMemory(*src_init)) {
679-
auto it_ext = external_initializers_offset_and_length.find(name);
680-
if (it_ext == external_initializers_offset_and_length.end()) {
681-
std::ostringstream err_msg;
682-
err_msg << "Initializer marked as external in memory but missing offset/length info: " << src_init->name();
683-
ORT_THROW(err_msg.str());
684-
}
685-
const size_t offset = it_ext->second.first;
686-
const size_t length = it_ext->second.second;
687-
688-
LOGS(logger, VERBOSE) << "In-memory initializer EXT: " << src_init->name() << ", size: " << length;
689-
690-
SetExternalDataFields(proto_init, (const void*)offset, length);
691-
} else {
692-
LOGS(logger, VERBOSE) << "File-based initializer: " << src_init->name() << ", data_type: " << src_init->data_type();
693-
}
694-
}
695-
}
696-
697537
DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
698-
699538
return model_proto;
700539
}
701540
}

0 commit comments

Comments
 (0)