Skip to content

Commit 397c61b

Browse files
ericcrawMayureshV1Copilot
authored
CVS-174585: Memory map shared weights when possible (#829)
* Memory map shared weights when possible * Update onnxruntime/core/providers/openvino/backend_utils.cc Co-authored-by: Copilot <[email protected]> * Update onnxruntime/core/providers/openvino/backend_utils.cc Co-authored-by: Copilot <[email protected]> --------- Co-authored-by: MayureshV1 <[email protected]> Co-authored-by: Copilot <[email protected]>
1 parent 19ebc1f commit 397c61b

File tree

3 files changed

+78
-19
lines changed

3 files changed

+78
-19
lines changed

onnxruntime/core/providers/openvino/backend_utils.cc

Lines changed: 59 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ using Exception = ov::Exception;
2020
namespace onnxruntime {
2121
namespace openvino_ep {
2222

23-
SharedContext::SharedWeights::WeightsFile::WeightsFile(std::filesystem::path filename) : file_(filename, std::ios::in | std::ios::binary) {
23+
SharedContext::SharedWeights::WeightsFile::WeightsFile(std::filesystem::path filename) : file_(filename, std::ios::in | std::ios::binary), file_path_(filename) {
2424
try {
2525
file_.exceptions(std::ifstream::failbit | std::ifstream::badbit);
26-
weights_size_ = file_.seekg(0, std::ios::end).tellg();
27-
} catch (std::ifstream::failure& e) {
26+
weights_size_ = std::filesystem::file_size(filename);
27+
} catch (const std::exception& e) {
2828
ORT_THROW("Error: Failed to open weight file at ", filename.string(), " ", e.what());
2929
}
3030
}
@@ -35,6 +35,32 @@ void SharedContext::SharedWeights::WeightsFile::load_weights(size_t file_offset,
3535
file_.read(reinterpret_cast<char*>(data), size);
3636
}
3737

38+
void* SharedContext::SharedWeights::WeightsFile::TryGetOrCreateDeviceMapping(std::optional<ov::RemoteContext>& remote_context) {
39+
std::string dev_name{};
40+
if (remote_context) {
41+
dev_name = remote_context->get_device_name();
42+
}
43+
44+
auto [it, inserted] = imported_device_tensors_.emplace(dev_name, MappingContainer{});
45+
if (inserted) {
46+
if (dev_name == "NPU") {
47+
#if OPENVINO_VERSION_AT_LEAST(2025, 3)
48+
// try to import the memory mapped file to remote tensor
49+
ORT_ENFORCE(remote_context, "Error: Remote context is required for NPU device.");
50+
auto npu_context = remote_context->as<ov::intel_npu::level_zero::ZeroContext>();
51+
auto&& l0_tensor = npu_context.create_tensor(ov::element::Type_t::u8, {weights_size_}, ov::intel_npu::FileDescriptor(file_path_));
52+
it->second = MappingContainer{.ptr_ = l0_tensor.get(), .tensor_ = l0_tensor};
53+
#endif
54+
} else if (dev_name.empty()) {
55+
// CPU/virtual device case, create a CPU tensor memory mapped from file
56+
auto&& mmaped_tensor = ov::read_tensor_data(file_path_);
57+
it->second = MappingContainer{.ptr_ = mmaped_tensor.data(), .tensor_ = mmaped_tensor};
58+
}
59+
}
60+
61+
return it->second.ptr_;
62+
}
63+
3864
std::ostream& operator<<(std::ostream& stream, const SharedContext::SharedWeights::Metadata::Map& metadata) {
3965
try {
4066
stream << metadata.size();
@@ -405,29 +431,43 @@ ov::element::Type GetOpenVINOElementType(ONNX_NAMESPACE::TensorProto_DataType dt
405431
void CreateOVTensors(const std::string& device_name,
406432
SharedContext::SharedWeights::Metadata::Map& metadata_map,
407433
SharedContext::SharedWeights::WeightsFile& weights) {
434+
// Get remote context if available
435+
std::optional<ov::RemoteContext> opt_remote_ctx;
436+
try {
437+
opt_remote_ctx = OVCore::Get()->core.get_default_context(device_name);
438+
} catch (const std::exception&) {
439+
// Remote context not available
440+
}
441+
408442
for (auto& [key, value] : metadata_map) {
409443
if (value.tensor) continue;
410444

411445
// Get element data type
412446
auto onnx_element_type = (ONNX_NAMESPACE::TensorProto_DataType)value.element_type;
413-
414-
ov::element::Type ov_elementType = GetOpenVINOElementType(onnx_element_type); // Map to OpenVINO data type
415-
416-
// Create OpenVINO Tensor
417-
if (device_name == "NPU") {
418-
// Use remote tensors
419-
auto npu_context = OVCore::Get()->core.get_default_context("NPU").as<ov::intel_npu::level_zero::ZeroContext>();
420-
auto&& remote_tensor = npu_context.create_l0_host_tensor(ov_elementType, value.dimensions, ov::intel_npu::TensorType::INPUT);
421-
422-
// Copy data to remote tensor
423-
weights.load_weights(value.data_offset, remote_tensor.get(), value.size);
424-
value.tensor = std::make_shared<ov::Tensor>(remote_tensor);
447+
ov::element::Type ov_elementType = GetOpenVINOElementType(onnx_element_type);
448+
449+
// Try to get memory-mapped weights
450+
ov::Tensor tensor;
451+
uint8_t* mmaped_weights = static_cast<uint8_t*>(weights.TryGetOrCreateDeviceMapping(opt_remote_ctx));
452+
453+
if (mmaped_weights) {
454+
// We have memory mapped weights. Create a Tensor view into it for this value.
455+
ORT_ENFORCE(value.data_offset < weights.Size() &&
456+
value.size <= weights.Size() &&
457+
(value.data_offset <= weights.Size() - value.size),
458+
"File offset + size outside of external initializer file");
459+
void* mmapped_offset = static_cast<void*>(mmaped_weights + value.data_offset);
460+
tensor = ov::Tensor(ov_elementType, value.dimensions, mmapped_offset);
425461
} else {
426-
// Use vanilla tensors
427-
value.tensor = std::make_shared<ov::Tensor>(ov_elementType, value.dimensions);
428-
weights.load_weights(value.data_offset, value.tensor->data(), value.size);
462+
ORT_ENFORCE(opt_remote_ctx, "Expected either memory-mapped weights or a valid remote context, but neither is available for device: ", device_name);
463+
// Can't mmap the file to device tensor, create a host tensor and copy the data
464+
tensor = opt_remote_ctx->create_host_tensor(ov_elementType, value.dimensions);
465+
ORT_ENFORCE(tensor.get_byte_size() == value.size, "Remote tensor size mismatch");
466+
weights.load_weights(value.data_offset, tensor.data(), value.size);
429467
}
430-
ORT_ENFORCE(value.tensor->get_byte_size() == value.size, "Unexpected tensor size mismatch");
468+
469+
ORT_ENFORCE(tensor.get_byte_size() == value.size, "Unexpected tensor size mismatch");
470+
value.tensor = std::make_shared<ov::Tensor>(std::move(tensor));
431471
}
432472
}
433473

onnxruntime/core/providers/openvino/contexts.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,18 @@ class SharedContext : public WeakSingleton<SharedContext> {
5555
explicit WeightsFile(std::filesystem::path filename);
5656

5757
void load_weights(size_t file_offset, void* data, size_t size);
58+
void* TryGetOrCreateDeviceMapping(std::optional<ov::RemoteContext>& remote_context);
59+
size_t Size() const { return weights_size_; }
5860

5961
private:
6062
std::ifstream file_;
63+
std::filesystem::path file_path_;
6164
size_t weights_size_;
65+
struct MappingContainer {
66+
void* ptr_{nullptr};
67+
ov::Tensor tensor_;
68+
};
69+
std::map<std::string, MappingContainer> imported_device_tensors_;
6270
};
6371

6472
void clear() {

onnxruntime/core/providers/openvino/ov_interface.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,17 @@
2121

2222
#include <string>
2323

24+
// Helper macro to test OpenVINO version at compile time.
25+
// Usage: #if OPENVINO_VERSION_AT_LEAST(2025, 3)
26+
// Falls back to 0 if OPENVINO_VERSION_MAJOR/MINOR are not defined.
27+
#if defined(OPENVINO_VERSION_MAJOR) && defined(OPENVINO_VERSION_MINOR)
28+
#define OPENVINO_VERSION_AT_LEAST(major, minor) \
29+
((OPENVINO_VERSION_MAJOR > (major)) || \
30+
(OPENVINO_VERSION_MAJOR == (major) && OPENVINO_VERSION_MINOR >= (minor)))
31+
#else
32+
#define OPENVINO_VERSION_AT_LEAST(major, minor) 0
33+
#endif
34+
2435
namespace onnxruntime {
2536
namespace openvino_ep {
2637
class OVCore;

0 commit comments

Comments
 (0)