@@ -20,11 +20,11 @@ using Exception = ov::Exception;
2020namespace onnxruntime {
2121namespace openvino_ep {
2222
23- SharedContext::SharedWeights::WeightsFile::WeightsFile (std::filesystem::path filename) : file_(filename, std::ios::in | std::ios::binary) {
23+ SharedContext::SharedWeights::WeightsFile::WeightsFile (std::filesystem::path filename) : file_(filename, std::ios::in | std::ios::binary), file_path_(filename) {
2424 try {
2525 file_.exceptions (std::ifstream::failbit | std::ifstream::badbit);
26- weights_size_ = file_. seekg ( 0 , std::ios::end). tellg ( );
27- } catch (std::ifstream::failure & e) {
26+ weights_size_ = std::filesystem::file_size (filename );
27+ } catch (const std::exception & e) {
2828 ORT_THROW (" Error: Failed to open weight file at " , filename.string (), " " , e.what ());
2929 }
3030}
@@ -35,6 +35,32 @@ void SharedContext::SharedWeights::WeightsFile::load_weights(size_t file_offset,
3535 file_.read (reinterpret_cast <char *>(data), size);
3636}
3737
38+ void * SharedContext::SharedWeights::WeightsFile::TryGetOrCreateDeviceMapping (std::optional<ov::RemoteContext>& remote_context) {
39+ std::string dev_name{};
40+ if (remote_context) {
41+ dev_name = remote_context->get_device_name ();
42+ }
43+
44+ auto [it, inserted] = imported_device_tensors_.emplace (dev_name, MappingContainer{});
45+ if (inserted) {
46+ if (dev_name == " NPU" ) {
47+ #if OPENVINO_VERSION_AT_LEAST(2025, 3)
48+ // try to import the memory mapped file to remote tensor
49+ ORT_ENFORCE (remote_context, " Error: Remote context is required for NPU device." );
50+ auto npu_context = remote_context->as <ov::intel_npu::level_zero::ZeroContext>();
51+ auto && l0_tensor = npu_context.create_tensor (ov::element::Type_t::u8 , {weights_size_}, ov::intel_npu::FileDescriptor (file_path_));
52+ it->second = MappingContainer{.ptr_ = l0_tensor.get (), .tensor_ = l0_tensor};
53+ #endif
54+ } else if (dev_name.empty ()) {
55+ // CPU/virtual device case, create a CPU tensor memory mapped from file
56+ auto && mmaped_tensor = ov::read_tensor_data (file_path_);
57+ it->second = MappingContainer{.ptr_ = mmaped_tensor.data (), .tensor_ = mmaped_tensor};
58+ }
59+ }
60+
61+ return it->second .ptr_ ;
62+ }
63+
3864std::ostream& operator <<(std::ostream& stream, const SharedContext::SharedWeights::Metadata::Map& metadata) {
3965 try {
4066 stream << metadata.size ();
@@ -405,29 +431,43 @@ ov::element::Type GetOpenVINOElementType(ONNX_NAMESPACE::TensorProto_DataType dt
405431void CreateOVTensors (const std::string& device_name,
406432 SharedContext::SharedWeights::Metadata::Map& metadata_map,
407433 SharedContext::SharedWeights::WeightsFile& weights) {
434+ // Get remote context if available
435+ std::optional<ov::RemoteContext> opt_remote_ctx;
436+ try {
437+ opt_remote_ctx = OVCore::Get ()->core .get_default_context (device_name);
438+ } catch (const std::exception&) {
439+ // Remote context not available
440+ }
441+
408442 for (auto & [key, value] : metadata_map) {
409443 if (value.tensor ) continue ;
410444
411445 // Get element data type
412446 auto onnx_element_type = (ONNX_NAMESPACE::TensorProto_DataType)value.element_type ;
413-
414- ov::element::Type ov_elementType = GetOpenVINOElementType (onnx_element_type); // Map to OpenVINO data type
415-
416- // Create OpenVINO Tensor
417- if (device_name == " NPU" ) {
418- // Use remote tensors
419- auto npu_context = OVCore::Get ()->core .get_default_context (" NPU" ).as <ov::intel_npu::level_zero::ZeroContext>();
420- auto && remote_tensor = npu_context.create_l0_host_tensor (ov_elementType, value.dimensions , ov::intel_npu::TensorType::INPUT);
421-
422- // Copy data to remote tensor
423- weights.load_weights (value.data_offset , remote_tensor.get (), value.size );
424- value.tensor = std::make_shared<ov::Tensor>(remote_tensor);
447+ ov::element::Type ov_elementType = GetOpenVINOElementType (onnx_element_type);
448+
449+ // Try to get memory-mapped weights
450+ ov::Tensor tensor;
451+ uint8_t * mmaped_weights = static_cast <uint8_t *>(weights.TryGetOrCreateDeviceMapping (opt_remote_ctx));
452+
453+ if (mmaped_weights) {
454+ // We have memory mapped weights. Create a Tensor view into it for this value.
455+ ORT_ENFORCE (value.data_offset < weights.Size () &&
456+ value.size <= weights.Size () &&
457+ (value.data_offset <= weights.Size () - value.size ),
458+ " File offset + size outside of external initializer file" );
459+ void * mmapped_offset = static_cast <void *>(mmaped_weights + value.data_offset );
460+ tensor = ov::Tensor (ov_elementType, value.dimensions , mmapped_offset);
425461 } else {
426- // Use vanilla tensors
427- value.tensor = std::make_shared<ov::Tensor>(ov_elementType, value.dimensions );
428- weights.load_weights (value.data_offset , value.tensor ->data (), value.size );
462+ ORT_ENFORCE (opt_remote_ctx, " Expected either memory-mapped weights or a valid remote context, but neither is available for device: " , device_name);
463+ // Can't mmap the file to device tensor, create a host tensor and copy the data
464+ tensor = opt_remote_ctx->create_host_tensor (ov_elementType, value.dimensions );
465+ ORT_ENFORCE (tensor.get_byte_size () == value.size , " Remote tensor size mismatch" );
466+ weights.load_weights (value.data_offset , tensor.data (), value.size );
429467 }
430- ORT_ENFORCE (value.tensor ->get_byte_size () == value.size , " Unexpected tensor size mismatch" );
468+
469+ ORT_ENFORCE (tensor.get_byte_size () == value.size , " Unexpected tensor size mismatch" );
470+ value.tensor = std::make_shared<ov::Tensor>(std::move (tensor));
431471 }
432472}
433473
0 commit comments