diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
index 99f28439db53a..989d1022f1d7b 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.cc
+++ b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -21,6 +21,7 @@
 #include "core/providers/openvino/ov_versions/capability.h"
 #include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
 #include "core/providers/openvino/qdq_transformations/qdq_scales_fix.h"
+#include "../../framework/tensorprotoutils.h"
 
 namespace onnxruntime {
 namespace openvino_ep {
@@ -453,6 +454,80 @@ static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& on
 #endif
 }
 
+// this is a helper function to set the data fields, it duplicates ExternalDataInfo::SetExternalLocationToProto
+// but we cannot use that function as it is not part of public provider api.
+static void SetExternalDataFields(ONNX_NAMESPACE::TensorProto* proto_init, const void* data_ptr, int64_t data_size) {
+  static constexpr const char* ORT_INTERNAL_MEM_INITIALIZER = "*/_ORT_MEM_ADDR_/*";
+  auto* external_data = proto_init->mutable_external_data();
+  bool found_location = false, found_offset = false, found_length = false;
+  const int ext_data_size = external_data->size();
+  proto_init->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL);
+
+  for (int j = 0; j < ext_data_size; ++j) {
+    auto& ext_entry = external_data->at(j);
+    auto& key = *ext_entry.mutable_key();
+    if (key == "location") {
+      *ext_entry.mutable_value() = ORT_INTERNAL_MEM_INITIALIZER;
+      found_location = true;
+    } else if (key == "offset") {
+      *ext_entry.mutable_value() = std::to_string(reinterpret_cast<uintptr_t>(data_ptr));
+      found_offset = true;
+    } else if (key == "length") {
+      *ext_entry.mutable_value() = std::to_string(data_size);
+      found_length = true;
+    }
+  }
+
+  if (!found_location) {
+    auto* new_entry = external_data->Add();
+    *new_entry->mutable_key() = "location";
+    *new_entry->mutable_value() = ORT_INTERNAL_MEM_INITIALIZER;
+  }
+  if (!found_offset) {
+    auto* new_entry = external_data->Add();
+    *new_entry->mutable_key() = "offset";
+    *new_entry->mutable_value() = std::to_string(reinterpret_cast<uintptr_t>(data_ptr));
+  }
+  if (!found_length) {
+    auto* new_entry = external_data->Add();
+    *new_entry->mutable_key() = "length";
+    *new_entry->mutable_value() = std::to_string(data_size);
+  }
+}
+
+static void ReadExternalDataFields(const ONNX_NAMESPACE::TensorProto* src_init, std::string& location, size_t& offset, size_t& length) {
+  // Remove constness as we need to use mutable_external_data() to get the entries to read.
+  // The entries themselves are not modified...
+  auto& mutable_proto = *const_cast<ONNX_NAMESPACE::TensorProto*>(src_init);
+  auto* entry_protos = mutable_proto.mutable_external_data();
+  for (int i = 0; i < entry_protos->size(); i++) {
+    auto& string_entry_proto{entry_protos->at(i)};
+    const auto& pb_key{*(string_entry_proto.mutable_key())};
+    const auto& pb_value{*(string_entry_proto.mutable_value())};
+    if (pb_key == "location") {
+      location = pb_value;
+    } else if (pb_key == "offset") {
+      const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), offset);
+      if (res.ec != std::errc()) {
+        std::ostringstream err_msg;
+        err_msg << "External data in memory has invalid offset field: "
+                << src_init->name() << "], location: " << location
+                << ", offset: " << pb_value;
+        ORT_THROW(err_msg.str());
+      }
+    } else if (pb_key == "length") {
+      const auto res = std::from_chars(pb_value.data(), pb_value.data() + pb_value.size(), length);
+      if (res.ec != std::errc()) {
+        std::ostringstream err_msg;
+        err_msg << "External data in memory has invalid length field: "
+                << src_init->name() << "], location: " << location
+                << ", length: " << pb_value;
+        ORT_THROW(err_msg.str());
+      }
+    }
+  }
+}
+
 std::unique_ptr<ONNX_NAMESPACE::ModelProto>
 BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
                                            const onnxruntime::GraphViewer& subgraph,
@@ -529,12 +604,98 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
     return model_proto;
   } else {
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled";
+
+    // scan ext initializers:
+    std::unordered_map<std::string, std::pair<size_t, size_t>> external_initializers_offset_and_length;
+    std::string tempLocation;
+    size_t extInitializerTotalSize = 0;
+    if (session_context_.has_external_weights) {
+      auto allInitializers = subgraph.GetAllInitializedTensors();
+      for (auto& [name, tp] : allInitializers) {
+        if (utils::HasExternalDataInMemory(*tp)) {
+          size_t offset = 0;
+          size_t length = 0;
+          ReadExternalDataFields(tp, tempLocation, offset, length);
+          extInitializerTotalSize += length;
+          external_initializers_offset_and_length[name] = {offset, length};
+        }
+      }
+    }
+
+    // when we have external weights in memory, the model proto will actually embed those
+    // and bloat the serialized string. We can avoid that by not including the data in the proto
+    // but then we have to update those initializers and set the external_data fields to mem_addr tag...
+    // proto is limited to 2GB, but let's use 32MB as threshold to be conservative and still gain some memory reductions.
+#if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 3)) || (OPENVINO_VERSION_MAJOR > 2025))
+    constexpr size_t MAX_EMBEDDED_INITIALIZER_SIZE = 1024 * 1024 * 32;
+    const bool include_initializer_data_in_proto = !(session_context_.has_external_weights && 
+                                                     external_initializers_offset_and_length.size() > 1 && 
+                                                     extInitializerTotalSize >= MAX_EMBEDDED_INITIALIZER_SIZE);
+#else
+    const bool include_initializer_data_in_proto = true;
+#endif
+
+
     auto model = subgraph.CreateModel(logger);
     auto model_proto = model->ToProto();
     model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
-    subgraph.ToProto(*model_proto->mutable_graph(), true, true);
+    subgraph.ToProto(*model_proto->mutable_graph(), /*include_initializers*/true, 
+                     /*include_outer_scope_args*/true, /*execution_order*/0, /*include_initializer_data*/include_initializer_data_in_proto);
+
     print_model_proto_duration();
+
+    if (!include_initializer_data_in_proto) {
+      LOGS(logger, INFO) << "Initializer data is not included in the model proto. Updating metadata..., total size " << extInitializerTotalSize / (1024 * 1024) << " MB in " << external_initializers_offset_and_length.size() << " initializers";
+      auto* graph_proto = model_proto->mutable_graph();
+      auto* proto_initializers = graph_proto->mutable_initializer();
+
+      std::unordered_map<std::string, ONNX_NAMESPACE::TensorProto*> proto_initializer_map;
+      for (int i = 0, n = proto_initializers->size(); i < n; ++i) {
+        auto& proto_init = proto_initializers->at(i);
+        proto_initializer_map[proto_init.name()] = &proto_init;
+      }
+
+      for (const auto& [name, src_init] : subgraph.GetAllInitializedTensors()) {
+        auto it = proto_initializer_map.find(name);
+        if (it == proto_initializer_map.end())
+          continue;
+
+        auto* proto_init = it->second;
+
+        // If the proto initializer is missing data, fill it in
+        if (!proto_init->has_raw_data() && src_init->has_raw_data()) {
+          *proto_init->mutable_raw_data() = src_init->raw_data();
+        }
+
+        // Only set in-memory external_data fields if the data is in memory
+        if (src_init->has_raw_data()) {
+          LOGS(logger, VERBOSE) << "In-memory initializer RAW: "
+                                << src_init->name()
+                                << ", data_type: " << src_init->data_type()
+                                << ", raw_data size: " << src_init->raw_data().size();
+
+          SetExternalDataFields(proto_init, src_init->raw_data().data(), src_init->raw_data().size());
+        } else if (onnxruntime::utils::HasExternalDataInMemory(*src_init)) {
+          auto it_ext = external_initializers_offset_and_length.find(name);
+          if (it_ext == external_initializers_offset_and_length.end()) {
+            std::ostringstream err_msg;
+            err_msg << "Initializer marked as external in memory but missing offset/length info: " << src_init->name();
+            ORT_THROW(err_msg.str());
+          }
+          const size_t offset = it_ext->second.first;
+          const size_t length = it_ext->second.second;
+
+          LOGS(logger, VERBOSE) << "In-memory initializer EXT: " << src_init->name() << ", size: " << length;
+
+          SetExternalDataFields(proto_init, (const void*)offset, length);
+        } else {
+          LOGS(logger, VERBOSE) << "File-based initializer: " << src_init->name() << ", data_type: " << src_init->data_type();
+        }
+      }
+    }
+
     DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
+
     return model_proto;
   }
 }
diff --git a/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc b/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc
new file mode 100644
index 0000000000000..21ec61c2d2e3f
--- /dev/null
+++ b/onnxruntime/test/providers/openvino/openvino_ep_ext_init.cc
@@ -0,0 +1,215 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <filesystem>
+#include <map>
+#include <string>
+
+#include "core/session/onnxruntime_cxx_api.h"
+
+#include "test/util/include/test/test_environment.h"
+#include "test/unittest_util/qdq_test_utils.h"
+
+#include "gtest/gtest.h"
+#include "gmock/gmock.h"
+#include "onnxruntime_session_options_config_keys.h"
+
+using namespace ONNX_NAMESPACE;
+using namespace onnxruntime::logging;
+
+extern std::unique_ptr<Ort::Env> ort_env;
+
+class OVEP_ExtInit_Tests : public ::testing::TestWithParam<std::string> {};
+
+namespace {
+
+std::vector<uint8_t> LoadFileToMemory(const std::string& path) {
+  std::ifstream file(path, std::ios::binary | std::ios::ate);
+  if (!file.is_open()) {
+    return std::vector<uint8_t>();
+  }
+  std::streamsize size = file.tellg();
+  file.seekg(0, std::ios::beg);
+  std::vector<uint8_t> buffer(static_cast<size_t>(size));
+  if (!file.read(reinterpret_cast<char*>(buffer.data()), size)) {
+    return std::vector<uint8_t>();
+  }
+  return buffer;
+}
+
+auto ProbeDevice(const std::string& device) {
+  static std::map<std::string, bool> is_present;
+  if (is_present.find(device) == is_present.end()) {
+    Ort::SessionOptions sessionOptions;
+    std::unordered_map<std::string, std::string> ov_options;
+    ov_options["device_type"] = device;
+    try {
+      sessionOptions.AppendExecutionProvider_OpenVINO_V2(ov_options);
+      is_present[device] = true;
+    } catch (...) {
+      is_present[device] = false;
+    }
+  }
+  return is_present[device];
+}
+}  // namespace detail
+
+namespace onnxruntime {
+namespace test {
+
+// this test requiresOV 2025.4+ to run, currently CI uses OV 2025.2, so the test will be disabled until OV is updated
+TEST_P(OVEP_ExtInit_Tests, DISABLED_ModelFromExtInit) {
+  const auto& device = GetParam();
+  if (!ProbeDevice(device))
+    GTEST_SKIP() << device + " is not available on this machine";
+       
+  // Model and weights file paths
+  const std::string model_path = "ovep_ext_init_test.onnx";
+  const std::string weights_path = "ovep_ext_init_test.onnx.data";
+  const size_t num_initializers = 8;
+  const size_t floats_per_initializer = 64 * 1024 * 1024;  // 64 millions floats per initializer, 256MB
+  const size_t total_floats = num_initializers * floats_per_initializer;
+  const size_t total_bytes = total_floats * sizeof(float);
+  // min size threshold for new logic with ext initializers
+  ASSERT_GE(total_bytes, 32 * 1024 * 1024);
+
+  // 1. Create initializers
+  std::vector<std::vector<float>> initializer_data;
+  for (size_t i = 0; i < num_initializers; ++i)
+    initializer_data.emplace_back(floats_per_initializer, static_cast<float>(i + 1));  // W0:1, W1:2...
+
+  // 2. Build ONNX model with 4 external initializers, and 4 ADD nodes
+  {
+    ModelProto model_proto;
+    model_proto.set_ir_version(7);
+    model_proto.set_producer_name("openvino_extinit_test");
+    model_proto.set_producer_version("1.0");
+    model_proto.set_domain("");
+    model_proto.set_model_version(1);
+
+    auto* graph = model_proto.mutable_graph();
+    graph->set_name("TestGraph");
+
+    // Input: shape [floats_per_initializer]
+    auto* input = graph->add_input();
+    input->set_name("X");
+    auto* input_type = input->mutable_type()->mutable_tensor_type();
+    input_type->set_elem_type(TensorProto_DataType_FLOAT);
+    input_type->mutable_shape()->add_dim()->set_dim_value(floats_per_initializer);
+
+    // Output: shape [floats_per_initializer]
+    auto* output = graph->add_output();
+    output->set_name("Y");
+    auto* output_type = output->mutable_type()->mutable_tensor_type();
+    output_type->set_elem_type(TensorProto_DataType_FLOAT);
+    output_type->mutable_shape()->add_dim()->set_dim_value(floats_per_initializer);
+
+    auto* opset_import = model_proto.add_opset_import();
+    opset_import->set_domain("");
+    opset_import->set_version(19);
+
+    // Add initializers as external data
+    size_t offset = 0;
+    std::vector<std::string> initializer_names;
+    for (size_t i = 0; i < num_initializers; ++i) {
+      std::string name = "W" + std::to_string(i);
+      initializer_names.push_back(name);
+      TensorProto* initializer = graph->add_initializer();
+      initializer->set_name(name);
+      initializer->set_data_type(TensorProto_DataType_FLOAT);
+      initializer->add_dims(floats_per_initializer);
+      initializer->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL);
+      auto* ext = initializer->add_external_data();
+      ext->set_key("location");
+      ext->set_value(weights_path);
+      ext = initializer->add_external_data();
+      ext->set_key("offset");
+      ext->set_value(std::to_string(offset));
+      ext = initializer->add_external_data();
+      ext->set_key("length");
+      ext->set_value(std::to_string(floats_per_initializer * sizeof(float)));
+      offset += floats_per_initializer * sizeof(float);
+    }
+
+    // nodes: X -> Add with Init[0] -> ... -> output Y
+    std::string prev_output = "X";
+    std::string node_output;
+    for (size_t i = 0; i < num_initializers; ++i) {
+      node_output = (i == num_initializers - 1) ? "Y" : "A" + std::to_string(i);
+      auto* add_node = graph->add_node();
+      add_node->set_op_type("Add");
+      add_node->add_input(prev_output);
+      add_node->add_input(initializer_names[i]);
+      add_node->add_output(node_output);
+      prev_output = node_output;
+    }
+
+    // Save model
+    std::ofstream model_file(model_path, std::ios::binary);
+    ASSERT_TRUE(model_proto.SerializeToOstream(&model_file));
+    model_file.close();
+  }
+
+  // 3. Save weights file (concatenate all initializers)
+  {
+    std::ofstream weights_file(weights_path, std::ios::binary);
+    ASSERT_TRUE(weights_file.is_open());
+    for (const auto& w : initializer_data) {
+      weights_file.write(reinterpret_cast<const char*>(w.data()), w.size() * sizeof(float));
+    }
+    weights_file.close();
+  }
+
+  // 4. Load model and weights into memory
+  std::vector<uint8_t> model_data = LoadFileToMemory(model_path);
+  std::vector<uint8_t> weights_data = LoadFileToMemory(weights_path);
+
+  // 5. Prepare external initializer info
+  PathString weights_name_path(weights_path.begin(), weights_path.end());
+  std::vector<PathString> names_path = {weights_name_path};
+  std::vector<char*> buffers = {reinterpret_cast<char*>(weights_data.data())};
+  std::vector<size_t> buffer_sizes = {weights_data.size()};
+
+  // 6. Set up session options with OpenVINO
+  Ort::SessionOptions session_options;
+  session_options.AddConfigEntry(kOrtSessionOptionsDisableCPUEPFallback, "1");
+  session_options.SetIntraOpNumThreads(1);
+  std::unordered_map<std::string, std::string> ov_options = { {"device_type", device } };
+  session_options.AppendExecutionProvider_OpenVINO_V2(ov_options);
+  session_options.AddExternalInitializersFromFilesInMemory(names_path, buffers, buffer_sizes);
+
+  // 7. Create session from memory
+  Ort::Session session(*ort_env, model_data.data(), model_data.size(), session_options);
+
+  // 8. Run inference to verify weights are loaded
+  std::vector<float> input_data(floats_per_initializer, 2.0f);
+  std::vector<int64_t> input_shape = {static_cast<int64_t>(floats_per_initializer)};
+  Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtDeviceAllocator, OrtMemTypeDefault);
+  Ort::Value input_tensor = Ort::Value::CreateTensor<float>(mem_info, input_data.data(), input_data.size(), input_shape.data(), input_shape.size());
+
+  std::vector<const char*> input_names = {"X"};
+  std::vector<const char*> output_names = {"Y"};
+  std::vector<Ort::Value> output_tensors(1);
+
+  session.Run(Ort::RunOptions{nullptr}, input_names.data(), &input_tensor, 1, output_names.data(), output_tensors.data(), 1);
+
+  // Check output: should be input + W0 + W1 + W2...
+  auto* out_data = output_tensors[0].GetTensorMutableData<float>();
+  float expected = input_data[0];
+  for (size_t i = 0; i < num_initializers; ++i) {
+    expected += initializer_data[i][0];
+  }
+
+  for (size_t i = 0; i < floats_per_initializer; ++i)
+    ASSERT_FLOAT_EQ(out_data[i], expected);
+
+  // Cleanup
+  std::filesystem::remove(model_path);
+  std::filesystem::remove(weights_path);
+}
+INSTANTIATE_TEST_SUITE_P(OVEP_Tests,
+                         OVEP_ExtInit_Tests,
+                         ::testing::Values("CPU", "GPU", "NPU"));
+
+}  // namespace test
+}  // namespace onnxruntime