Skip to content

Commit 7907cd7

Browse files
committed
Updated the condition for the new logic based on the total size of ext initializers, comments, refactoring
Signed-off-by: bfilipek <[email protected]>
1 parent 7aa43d9 commit 7907cd7

File tree

1 file changed

+65
-65
lines changed

1 file changed

+65
-65
lines changed

onnxruntime/core/providers/openvino/backend_manager.cc

Lines changed: 65 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -454,8 +454,10 @@ static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& on
454454
#endif
455455
}
456456

457+
// this is a helper function to set the data fields, it duplicates ExternalDataInfo::SetExternalLocationToProto
458+
// but we cannot use that function as it is not part of public provider api.
457459
static void SetExternalDataFields(ONNX_NAMESPACE::TensorProto* proto_init, const void* data_ptr, int64_t data_size) {
458-
static constexpr const char* ORT_INTERNAL_MEM_INITIALIZER = "*/_ORT_MEM_ADDR_/*";
460+
static constexpr const char* ORT_INTERNAL_MEM_INITIALIZER = "*/_ORT_MEM_ADDR_/*";
459461
auto* external_data = proto_init->mutable_external_data();
460462
bool found_location = false, found_offset = false, found_length = false;
461463
const int ext_data_size = external_data->size();
@@ -494,7 +496,7 @@ static void SetExternalDataFields(ONNX_NAMESPACE::TensorProto* proto_init, const
494496
}
495497

496498
static void ReadExternalDataFields(const ONNX_NAMESPACE::TensorProto* src_init, std::string& location, size_t& offset, size_t& length) {
497-
// Remove constness as we need to use mutable_external_data() to get the entries to read.
499+
// Remove constness as we need to use mutable_external_data() to get the entries to read.
498500
// The entries themselves are not modified...
499501
auto& mutable_proto = *const_cast<ONNX_NAMESPACE::TensorProto*>(src_init);
500502
auto* entry_protos = mutable_proto.mutable_external_data();
@@ -603,21 +605,31 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
603605
} else {
604606
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled";
605607

606-
const size_t extInitializerCount = [&subgraph, cnt = 0ull]() mutable {
608+
// scan ext initializers:
609+
std::unordered_map<std::string, std::pair<size_t, size_t>> external_initializers_offset_and_length;
610+
std::string tempLocation;
611+
size_t extInitializerTotalSize = 0;
612+
if (session_context_.has_external_weights) {
607613
auto allInitializers = subgraph.GetAllInitializedTensors();
608614
for (auto& [name, tp] : allInitializers) {
609615
if (utils::HasExternalDataInMemory(*tp)) {
610-
++cnt;
616+
size_t offset = 0;
617+
size_t length = 0;
618+
ReadExternalDataFields(tp, tempLocation, offset, length);
619+
extInitializerTotalSize += length;
620+
external_initializers_offset_and_length[name] = {offset, length};
611621
}
612-
}
613-
return cnt;
614-
}();
622+
}
623+
}
615624

616625
// when we have external weights in memory, the model proto will actually embed those
617626
// and bloat the serialized string. We can avoid that by not including the data in the proto
618627
// but then we have to update those initializers and set the external_data fields to mem_addr tag...
619-
// 1 is arbitrary number, but if we have more than 1 external initializer, then the savings are worth the effort
620-
const bool include_initializer_data_in_proto = !(session_context_.has_external_weights == true && extInitializerCount > 1);
628+
// proto is limited to 2GB, but let's use 512MB as threshold to be conservative and still gain some memory reductions.
629+
constexpr size_t MAX_EMBEDDED_INITIALIZER_SIZE = 1024 * 1024 * 512;
630+
const bool include_initializer_data_in_proto = !(session_context_.has_external_weights == true &&
631+
external_initializers_offset_and_length.size() > 1 &&
632+
extInitializerTotalSize > MAX_EMBEDDED_INITIALIZER_SIZE);
621633

622634
auto model = subgraph.CreateModel(logger);
623635
auto model_proto = model->ToProto();
@@ -628,66 +640,54 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
628640
print_model_proto_duration();
629641
DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
630642

631-
// new code:
632-
if (!include_initializer_data_in_proto)
633-
{
634-
LOGS(logger, INFO) << "Initializer data is not included in the model proto. Updating metadata...";
635-
const auto& allInitializers = subgraph.GetAllInitializedTensors();
636-
auto* graph_proto = model_proto->mutable_graph();
637-
auto* proto_initializers = graph_proto->mutable_initializer();
638-
639-
// Build a map for quick lookup by name
640-
std::unordered_map<std::string, ONNX_NAMESPACE::TensorProto*> proto_initializer_map;
641-
for (int i = 0, n = proto_initializers->size(); i < n; ++i) {
642-
auto& proto_init = proto_initializers->at(i);
643-
proto_initializer_map[proto_init.name()] = &proto_init;
644-
}
643+
if (!include_initializer_data_in_proto) {
644+
LOGS(logger, INFO) << "Initializer data is not included in the model proto. Updating metadata..., total size " << extInitializerTotalSize / (1024 * 1024) << " MB in " << external_initializers_offset_and_length.size() << " initializers";
645+
auto* graph_proto = model_proto->mutable_graph();
646+
auto* proto_initializers = graph_proto->mutable_initializer();
647+
648+
std::unordered_map<std::string, ONNX_NAMESPACE::TensorProto*> proto_initializer_map;
649+
for (int i = 0, n = proto_initializers->size(); i < n; ++i) {
650+
auto& proto_init = proto_initializers->at(i);
651+
proto_initializer_map[proto_init.name()] = &proto_init;
652+
}
653+
654+
for (const auto& [name, src_init] : subgraph.GetAllInitializedTensors()) {
655+
auto it = proto_initializer_map.find(name);
656+
if (it == proto_initializer_map.end())
657+
continue;
645658

646-
for (const auto& init_entry : allInitializers) {
647-
const std::string& name = init_entry.first;
648-
const ONNX_NAMESPACE::TensorProto* src_init = init_entry.second;
649-
650-
auto it = proto_initializer_map.find(name);
651-
if (it == proto_initializer_map.end())
652-
continue;
653-
654-
auto* proto_init = it->second;
655-
656-
// If the proto initializer is missing data, fill it in
657-
if (!proto_init->has_raw_data() && src_init->has_raw_data()) {
658-
*proto_init->mutable_raw_data() = src_init->raw_data();
659-
}
660-
661-
// Only set in-memory external_data fields if the data is in memory
662-
if (src_init->has_raw_data()) {
663-
LOGS(logger, VERBOSE) << "In-memory initializer RAW: "
664-
<< src_init->name()
665-
<< ", data_type: " << src_init->data_type()
666-
<< ", raw_data size: " << src_init->raw_data().size();
667-
668-
SetExternalDataFields(proto_init, src_init->raw_data().data(), src_init->raw_data().size());
669-
}
670-
else if (onnxruntime::utils::HasExternalDataInMemory(*src_init)) {
671-
std::string location;
672-
size_t offset = 0;
673-
size_t length = 0;
674-
ReadExternalDataFields(src_init, location, offset, length);
675-
676-
LOGS(logger, VERBOSE) << "In-memory initializer EXT: "
677-
<< src_init->name()
678-
<< ", size: " << length;
679-
680-
SetExternalDataFields(proto_init, (const void*)offset, length);
681-
}
682-
else {
683-
// Debug info for file-based initializers
684-
LOGS(logger, VERBOSE)<< "File-based initializer: "
685-
<< src_init->name()
686-
<< ", data_type: " << src_init->data_type();
687-
}
659+
auto* proto_init = it->second;
688660

661+
// If the proto initializer is missing data, fill it in
662+
if (!proto_init->has_raw_data() && src_init->has_raw_data()) {
663+
*proto_init->mutable_raw_data() = src_init->raw_data();
689664
}
690665

666+
// Only set in-memory external_data fields if the data is in memory
667+
if (src_init->has_raw_data()) {
668+
LOGS(logger, VERBOSE) << "In-memory initializer RAW: "
669+
<< src_init->name()
670+
<< ", data_type: " << src_init->data_type()
671+
<< ", raw_data size: " << src_init->raw_data().size();
672+
673+
SetExternalDataFields(proto_init, src_init->raw_data().data(), src_init->raw_data().size());
674+
} else if (onnxruntime::utils::HasExternalDataInMemory(*src_init)) {
675+
auto it_ext = external_initializers_offset_and_length.find(name);
676+
if (it_ext == external_initializers_offset_and_length.end()) {
677+
std::ostringstream err_msg;
678+
err_msg << "Initializer marked as external in memory but missing offset/length info: " << src_init->name();
679+
ORT_THROW(err_msg.str());
680+
}
681+
const size_t offset = it_ext->second.first;
682+
const size_t length = it_ext->second.second;
683+
684+
LOGS(logger, VERBOSE) << "In-memory initializer EXT: " << src_init->name() << ", size: " << length;
685+
686+
SetExternalDataFields(proto_init, (const void*)offset, length);
687+
} else {
688+
LOGS(logger, VERBOSE) << "File-based initializer: " << src_init->name() << ", data_type: " << src_init->data_type();
689+
}
690+
}
691691
}
692692

693693
return model_proto;

0 commit comments

Comments
 (0)