@@ -454,8 +454,10 @@ static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& on
454
454
#endif
455
455
}
456
456
457
+ // this is a helper function to set the data fields, it duplicates ExternalDataInfo::SetExternalLocationToProto
458
+ // but we cannot use that function as it is not part of public provider api.
457
459
static void SetExternalDataFields (ONNX_NAMESPACE::TensorProto* proto_init, const void * data_ptr, int64_t data_size) {
458
- static constexpr const char * ORT_INTERNAL_MEM_INITIALIZER = " */_ORT_MEM_ADDR_/*" ;
460
+ static constexpr const char * ORT_INTERNAL_MEM_INITIALIZER = " */_ORT_MEM_ADDR_/*" ;
459
461
auto * external_data = proto_init->mutable_external_data ();
460
462
bool found_location = false , found_offset = false , found_length = false ;
461
463
const int ext_data_size = external_data->size ();
@@ -494,7 +496,7 @@ static void SetExternalDataFields(ONNX_NAMESPACE::TensorProto* proto_init, const
494
496
}
495
497
496
498
static void ReadExternalDataFields (const ONNX_NAMESPACE::TensorProto* src_init, std::string& location, size_t & offset, size_t & length) {
497
- // Remove constness as we need to use mutable_external_data() to get the entries to read.
499
+ // Remove constness as we need to use mutable_external_data() to get the entries to read.
498
500
// The entries themselves are not modified...
499
501
auto & mutable_proto = *const_cast <ONNX_NAMESPACE::TensorProto*>(src_init);
500
502
auto * entry_protos = mutable_proto.mutable_external_data ();
@@ -603,21 +605,31 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
603
605
} else {
604
606
LOGS_DEFAULT (INFO) << " [OpenVINO-EP] OVEP QDQ optimization pass is disabled" ;
605
607
606
- const size_t extInitializerCount = [&subgraph, cnt = 0ull ]() mutable {
608
+ // scan ext initializers:
609
+ std::unordered_map<std::string, std::pair<size_t , size_t >> external_initializers_offset_and_length;
610
+ std::string tempLocation;
611
+ size_t extInitializerTotalSize = 0 ;
612
+ if (session_context_.has_external_weights ) {
607
613
auto allInitializers = subgraph.GetAllInitializedTensors ();
608
614
for (auto & [name, tp] : allInitializers) {
609
615
if (utils::HasExternalDataInMemory (*tp)) {
610
- ++cnt;
616
+ size_t offset = 0 ;
617
+ size_t length = 0 ;
618
+ ReadExternalDataFields (tp, tempLocation, offset, length);
619
+ extInitializerTotalSize += length;
620
+ external_initializers_offset_and_length[name] = {offset, length};
611
621
}
612
- }
613
- return cnt;
614
- }();
622
+ }
623
+ }
615
624
616
625
// when we have external weights in memory, the model proto will actually embed those
617
626
// and bloat the serialized string. We can avoid that by not including the data in the proto
618
627
// but then we have to update those initializers and set the external_data fields to mem_addr tag...
619
- // 1 is arbitrary number, but if we have more than 1 external initializer, then the savings are worth the effort
620
- const bool include_initializer_data_in_proto = !(session_context_.has_external_weights == true && extInitializerCount > 1 );
628
+ // proto is limited to 2GB, but let's use 512MB as threshold to be conservative and still gain some memory reductions.
629
+ constexpr size_t MAX_EMBEDDED_INITIALIZER_SIZE = 1024 * 1024 * 512 ;
630
+ const bool include_initializer_data_in_proto = !(session_context_.has_external_weights == true &&
631
+ external_initializers_offset_and_length.size () > 1 &&
632
+ extInitializerTotalSize > MAX_EMBEDDED_INITIALIZER_SIZE);
621
633
622
634
auto model = subgraph.CreateModel (logger);
623
635
auto model_proto = model->ToProto ();
@@ -628,66 +640,54 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
628
640
print_model_proto_duration ();
629
641
DumpOpenVINOEPModel (onnx_model_path_name, model_proto.get (), fused_node);
630
642
631
- // new code:
632
- if (!include_initializer_data_in_proto)
633
- {
634
- LOGS (logger, INFO) << " Initializer data is not included in the model proto. Updating metadata..." ;
635
- const auto & allInitializers = subgraph.GetAllInitializedTensors ();
636
- auto * graph_proto = model_proto->mutable_graph ();
637
- auto * proto_initializers = graph_proto->mutable_initializer ();
638
-
639
- // Build a map for quick lookup by name
640
- std::unordered_map<std::string, ONNX_NAMESPACE::TensorProto*> proto_initializer_map;
641
- for (int i = 0 , n = proto_initializers->size (); i < n; ++i) {
642
- auto & proto_init = proto_initializers->at (i);
643
- proto_initializer_map[proto_init.name ()] = &proto_init;
644
- }
643
+ if (!include_initializer_data_in_proto) {
644
+ LOGS (logger, INFO) << " Initializer data is not included in the model proto. Updating metadata..., total size " << extInitializerTotalSize / (1024 * 1024 ) << " MB in " << external_initializers_offset_and_length.size () << " initializers" ;
645
+ auto * graph_proto = model_proto->mutable_graph ();
646
+ auto * proto_initializers = graph_proto->mutable_initializer ();
647
+
648
+ std::unordered_map<std::string, ONNX_NAMESPACE::TensorProto*> proto_initializer_map;
649
+ for (int i = 0 , n = proto_initializers->size (); i < n; ++i) {
650
+ auto & proto_init = proto_initializers->at (i);
651
+ proto_initializer_map[proto_init.name ()] = &proto_init;
652
+ }
653
+
654
+ for (const auto & [name, src_init] : subgraph.GetAllInitializedTensors ()) {
655
+ auto it = proto_initializer_map.find (name);
656
+ if (it == proto_initializer_map.end ())
657
+ continue ;
645
658
646
- for (const auto & init_entry : allInitializers) {
647
- const std::string& name = init_entry.first ;
648
- const ONNX_NAMESPACE::TensorProto* src_init = init_entry.second ;
649
-
650
- auto it = proto_initializer_map.find (name);
651
- if (it == proto_initializer_map.end ())
652
- continue ;
653
-
654
- auto * proto_init = it->second ;
655
-
656
- // If the proto initializer is missing data, fill it in
657
- if (!proto_init->has_raw_data () && src_init->has_raw_data ()) {
658
- *proto_init->mutable_raw_data () = src_init->raw_data ();
659
- }
660
-
661
- // Only set in-memory external_data fields if the data is in memory
662
- if (src_init->has_raw_data ()) {
663
- LOGS (logger, VERBOSE) << " In-memory initializer RAW: "
664
- << src_init->name ()
665
- << " , data_type: " << src_init->data_type ()
666
- << " , raw_data size: " << src_init->raw_data ().size ();
667
-
668
- SetExternalDataFields (proto_init, src_init->raw_data ().data (), src_init->raw_data ().size ());
669
- }
670
- else if (onnxruntime::utils::HasExternalDataInMemory (*src_init)) {
671
- std::string location;
672
- size_t offset = 0 ;
673
- size_t length = 0 ;
674
- ReadExternalDataFields (src_init, location, offset, length);
675
-
676
- LOGS (logger, VERBOSE) << " In-memory initializer EXT: "
677
- << src_init->name ()
678
- << " , size: " << length;
679
-
680
- SetExternalDataFields (proto_init, (const void *)offset, length);
681
- }
682
- else {
683
- // Debug info for file-based initializers
684
- LOGS (logger, VERBOSE)<< " File-based initializer: "
685
- << src_init->name ()
686
- << " , data_type: " << src_init->data_type ();
687
- }
659
+ auto * proto_init = it->second ;
688
660
661
+ // If the proto initializer is missing data, fill it in
662
+ if (!proto_init->has_raw_data () && src_init->has_raw_data ()) {
663
+ *proto_init->mutable_raw_data () = src_init->raw_data ();
689
664
}
690
665
666
+ // Only set in-memory external_data fields if the data is in memory
667
+ if (src_init->has_raw_data ()) {
668
+ LOGS (logger, VERBOSE) << " In-memory initializer RAW: "
669
+ << src_init->name ()
670
+ << " , data_type: " << src_init->data_type ()
671
+ << " , raw_data size: " << src_init->raw_data ().size ();
672
+
673
+ SetExternalDataFields (proto_init, src_init->raw_data ().data (), src_init->raw_data ().size ());
674
+ } else if (onnxruntime::utils::HasExternalDataInMemory (*src_init)) {
675
+ auto it_ext = external_initializers_offset_and_length.find (name);
676
+ if (it_ext == external_initializers_offset_and_length.end ()) {
677
+ std::ostringstream err_msg;
678
+ err_msg << " Initializer marked as external in memory but missing offset/length info: " << src_init->name ();
679
+ ORT_THROW (err_msg.str ());
680
+ }
681
+ const size_t offset = it_ext->second .first ;
682
+ const size_t length = it_ext->second .second ;
683
+
684
+ LOGS (logger, VERBOSE) << " In-memory initializer EXT: " << src_init->name () << " , size: " << length;
685
+
686
+ SetExternalDataFields (proto_init, (const void *)offset, length);
687
+ } else {
688
+ LOGS (logger, VERBOSE) << " File-based initializer: " << src_init->name () << " , data_type: " << src_init->data_type ();
689
+ }
690
+ }
691
691
}
692
692
693
693
return model_proto;
0 commit comments