@@ -455,44 +455,76 @@ static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& on
455
455
}
456
456
457
457
static void SetExternalDataFields (ONNX_NAMESPACE::TensorProto* proto_init, const void * data_ptr, int64_t data_size) {
458
- static constexpr const char * ORT_INTERNAL_MEM_INITIALIZER = " */_ORT_MEM_ADDR_/*" ;
459
- auto * external_data = proto_init->mutable_external_data ();
460
- bool found_location = false , found_offset = false , found_length = false ;
461
- const int ext_data_size = external_data->size ();
462
- proto_init->set_data_location (ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL);
463
-
464
- for (int j = 0 ; j < ext_data_size; ++j) {
465
- auto & ext_entry = external_data->at (j);
466
- auto & key = *ext_entry.mutable_key ();
467
- if (key == " location" ) {
468
- *ext_entry.mutable_value () = ORT_INTERNAL_MEM_INITIALIZER;
469
- found_location = true ;
470
- } else if (key == " offset" ) {
471
- *ext_entry.mutable_value () = std::to_string (reinterpret_cast <uintptr_t >(data_ptr));
472
- found_offset = true ;
473
- } else if (key == " length" ) {
474
- *ext_entry.mutable_value () = std::to_string (data_size);
475
- found_length = true ;
476
- }
458
+ static constexpr const char * ORT_INTERNAL_MEM_INITIALIZER = " */_ORT_MEM_ADDR_/*" ;
459
+ auto * external_data = proto_init->mutable_external_data ();
460
+ bool found_location = false , found_offset = false , found_length = false ;
461
+ const int ext_data_size = external_data->size ();
462
+ proto_init->set_data_location (ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL);
463
+
464
+ for (int j = 0 ; j < ext_data_size; ++j) {
465
+ auto & ext_entry = external_data->at (j);
466
+ auto & key = *ext_entry.mutable_key ();
467
+ if (key == " location" ) {
468
+ *ext_entry.mutable_value () = ORT_INTERNAL_MEM_INITIALIZER;
469
+ found_location = true ;
470
+ } else if (key == " offset" ) {
471
+ *ext_entry.mutable_value () = std::to_string (reinterpret_cast <uintptr_t >(data_ptr));
472
+ found_offset = true ;
473
+ } else if (key == " length" ) {
474
+ *ext_entry.mutable_value () = std::to_string (data_size);
475
+ found_length = true ;
477
476
}
477
+ }
478
478
479
- if (!found_location) {
480
- auto * new_entry = external_data->Add ();
481
- *new_entry->mutable_key () = " location" ;
482
- *new_entry->mutable_value () = ORT_INTERNAL_MEM_INITIALIZER;
483
- }
484
- if (!found_offset) {
485
- auto * new_entry = external_data->Add ();
486
- *new_entry->mutable_key () = " offset" ;
487
- *new_entry->mutable_value () = std::to_string (reinterpret_cast <uintptr_t >(data_ptr));
488
- }
489
- if (!found_length) {
490
- auto * new_entry = external_data->Add ();
491
- *new_entry->mutable_key () = " length" ;
492
- *new_entry->mutable_value () = std::to_string (data_size);
493
- }
479
+ if (!found_location) {
480
+ auto * new_entry = external_data->Add ();
481
+ *new_entry->mutable_key () = " location" ;
482
+ *new_entry->mutable_value () = ORT_INTERNAL_MEM_INITIALIZER;
483
+ }
484
+ if (!found_offset) {
485
+ auto * new_entry = external_data->Add ();
486
+ *new_entry->mutable_key () = " offset" ;
487
+ *new_entry->mutable_value () = std::to_string (reinterpret_cast <uintptr_t >(data_ptr));
488
+ }
489
+ if (!found_length) {
490
+ auto * new_entry = external_data->Add ();
491
+ *new_entry->mutable_key () = " length" ;
492
+ *new_entry->mutable_value () = std::to_string (data_size);
493
+ }
494
494
}
495
495
496
+ static void ReadExternalDataFields (const ONNX_NAMESPACE::TensorProto* src_init, std::string& location, size_t & offset, size_t & length) {
497
+ // Remove constness as we need to use mutable_external_data() to get the entries to read.
498
+ // The entries themselves are not modified...
499
+ auto & mutable_proto = *const_cast <ONNX_NAMESPACE::TensorProto*>(src_init);
500
+ auto * entry_protos = mutable_proto.mutable_external_data ();
501
+ for (int i = 0 ; i < entry_protos->size (); i++) {
502
+ auto & string_entry_proto{entry_protos->at (i)};
503
+ const auto & pb_key{*(string_entry_proto.mutable_key ())};
504
+ const auto & pb_value{*(string_entry_proto.mutable_value ())};
505
+ if (pb_key == " location" ) {
506
+ location = pb_value;
507
+ } else if (pb_key == " offset" ) {
508
+ const auto res = std::from_chars (pb_value.data (), pb_value.data () + pb_value.size (), offset);
509
+ if (res.ec != std::errc ()) {
510
+ std::ostringstream err_msg;
511
+ err_msg << " External data in memory has invalid offset field: "
512
+ << src_init->name () << " ], location: " << location
513
+ << " , offset: " << pb_value;
514
+ ORT_THROW (err_msg.str ());
515
+ }
516
+ } else if (pb_key == " length" ) {
517
+ const auto res = std::from_chars (pb_value.data (), pb_value.data () + pb_value.size (), length);
518
+ if (res.ec != std::errc ()) {
519
+ std::ostringstream err_msg;
520
+ err_msg << " External data in memory has invalid length field: "
521
+ << src_init->name () << " ], location: " << location
522
+ << " , length: " << pb_value;
523
+ ORT_THROW (err_msg.str ());
524
+ }
525
+ }
526
+ }
527
+ }
496
528
497
529
std::unique_ptr<ONNX_NAMESPACE::ModelProto>
498
530
BackendManager::GetModelProtoFromFusedNode (const onnxruntime::Node& fused_node,
@@ -571,37 +603,33 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
571
603
} else {
572
604
LOGS_DEFAULT (INFO) << " [OpenVINO-EP] OVEP QDQ optimization pass is disabled" ;
573
605
574
- static bool load_user_initializer_ = true ;
575
- size_t userWeightsFromRawData = 0 ;
576
- size_t userWeightsFromExternalDataInMemory = 0 ;
577
- size_t allInitializersCount = 0 ;
578
- if (load_user_initializer_) {
579
- auto allInitializers = subgraph.GetAllInitializedTensors ();
580
- allInitializersCount = allInitializers.size ();
581
-
582
- for (auto & entry : allInitializers) {
583
- auto * tp = entry.second ;
584
- if (tp->has_raw_data ()) {
585
- userWeightsFromRawData++;
586
- } else if (utils::HasExternalDataInMemory (*tp)) {
587
- userWeightsFromExternalDataInMemory++;
606
+ const size_t extInitializerCount = [&subgraph, cnt = 0ull ]() mutable {
607
+ auto allInitializers = subgraph.GetAllInitializedTensors ();
608
+ for (auto & [name, tp] : allInitializers) {
609
+ if (utils::HasExternalDataInMemory (*tp)) {
610
+ ++cnt;
588
611
}
589
612
}
590
- }
591
- LOGS_DEFAULT (INFO) << " [OpenVINO-EP] Loaded " << allInitializersCount << " initializers from the model. "
592
- << userWeightsFromRawData << " from raw_data, "
593
- << userWeightsFromExternalDataInMemory << " from external_data." ;
613
+ return cnt;
614
+ }();
615
+
616
+ // when we have external weights in memory, the model proto will actually embed those
617
+ // and bloat the serialized string. We can avoid that by not including the data in the proto
618
+ // but then we have to update those initializers and set the external_data fields to mem_addr tag...
619
+ // 1 is arbitrary number, but if we have more than 1 external initializer, then the savings are worth the effort
620
+ const bool include_initializer_data_in_proto = !(session_context_.has_external_weights == true && extInitializerCount > 1 );
594
621
595
622
auto model = subgraph.CreateModel (logger);
596
623
auto model_proto = model->ToProto ();
597
624
model_proto->set_ir_version (ONNX_NAMESPACE::Version::IR_VERSION);
598
- subgraph.ToProto (*model_proto->mutable_graph (), /* include_initializers*/ true , /* include_outer_scope_args*/ true , /* execution order*/ 0 , /* include_initializer_data*/ !load_user_initializer_);
625
+ subgraph.ToProto (*model_proto->mutable_graph (), /* include_initializers*/ true ,
626
+ /* include_outer_scope_args*/ true , /* execution_order*/ 0 , /* include_initializer_data*/ include_initializer_data_in_proto);
599
627
600
628
print_model_proto_duration ();
601
629
DumpOpenVINOEPModel (onnx_model_path_name, model_proto.get (), fused_node);
602
630
603
631
// new code:
604
- if (load_user_initializer_ )
632
+ if (!include_initializer_data_in_proto )
605
633
{
606
634
LOGS (logger, INFO) << " Initializer data is not included in the model proto. Updating metadata..." ;
607
635
const auto & allInitializers = subgraph.GetAllInitializedTensors ();
@@ -632,63 +660,24 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
632
660
633
661
// Only set in-memory external_data fields if the data is in memory
634
662
if (src_init->has_raw_data ()) {
635
- // Debug info for in-memory initializers
636
663
LOGS (logger, VERBOSE) << " In-memory initializer RAW: "
637
664
<< src_init->name ()
638
665
<< " , data_type: " << src_init->data_type ()
639
666
<< " , raw_data size: " << src_init->raw_data ().size ();
640
667
641
668
SetExternalDataFields (proto_init, src_init->raw_data ().data (), src_init->raw_data ().size ());
642
669
}
643
- else if (onnxruntime::utils::HasExternalDataInMemory (*src_init)) {
644
-
645
- using mutable_proto_t = ONNX_NAMESPACE::TensorProto*;
646
- auto & mutable_proto = *const_cast <mutable_proto_t >(src_init);
647
- auto * entry_protos = mutable_proto.mutable_external_data ();
670
+ else if (onnxruntime::utils::HasExternalDataInMemory (*src_init)) {
648
671
std::string location;
649
672
size_t offset = 0 ;
650
673
size_t length = 0 ;
651
- for (int i = 0 ; i < entry_protos->size (); i++) {
652
- auto & string_entry_proto{ entry_protos->at (i) };
653
- const auto & pb_key{ *(string_entry_proto.mutable_key ()) };
654
- const auto & pb_value{ *(string_entry_proto.mutable_value ()) };
655
- if (pb_key == " location" ) {
656
- location = pb_value;
657
- }
658
- else if (pb_key == " offset" ) {
659
- const auto res = std::from_chars (pb_value.data (), pb_value.data () + pb_value.size (), offset);
660
- if (res.ec != std::errc ()) {
661
- LOGS (logger, ERROR) << " External data in memory has invalid offset field: "
662
- << src_init->name () << " ], location: " << location
663
- << " , offset: " << pb_value;
664
- offset = 0 ;
665
- }
666
- }
667
- else if (pb_key == " length" ) {
668
- const auto res = std::from_chars (pb_value.data (), pb_value.data () + pb_value.size (), length);
669
- if (res.ec != std::errc ()) {
670
- LOGS (logger, ERROR) << " External data in memory has invalid length field: "
671
- << src_init->name () << " ], location: " << location
672
- << " , length: " << pb_value;
673
- offset = 0 ;
674
- }
675
- }
676
- }
677
- if (offset == 0 || length == 0 ) {
678
- LOGS (logger, ERROR) << " External data in memory has invalid external_data fields: "
679
- << src_init->name () << " ], location: " << location
680
- << " , offset: " << offset
681
- << " , length: " << length;
682
- }
683
- else
684
- {
685
- // we have data in it, so populate the proto_init
686
- LOGS (logger, VERBOSE) << " In-memory initializer EXT: "
687
- << src_init->name ()
688
- << " , size: " << length;
689
-
690
- SetExternalDataFields (proto_init, (const void *)offset, length);
691
- }
674
+ ReadExternalDataFields (src_init, location, offset, length);
675
+
676
+ LOGS (logger, VERBOSE) << " In-memory initializer EXT: "
677
+ << src_init->name ()
678
+ << " , size: " << length;
679
+
680
+ SetExternalDataFields (proto_init, (const void *)offset, length);
692
681
}
693
682
else {
694
683
// Debug info for file-based initializers
@@ -838,10 +827,7 @@ void BackendManager::Compute(OrtKernelContext* context) {
838
827
839
828
{
840
829
std::unique_lock<std::mutex> lock (mutex_);
841
- auto it = backend_map_.find (key);
842
- if (it != backend_map_.end ()) {
843
- dynamic_backend = it->second ;
844
- }
830
+ dynamic_backend = backend_map_[key];
845
831
}
846
832
847
833
if (!dynamic_backend) {
0 commit comments