microsoft · Copilot · Apr 23, 2026 · Apr 23, 2026 · Apr 23, 2026 · Apr 23, 2026
diff --git a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
@@ -60,6 +60,15 @@ static const char* const kOrtSessionOptionsDisableQuantQDQ = "session.disable_qu
 static const char* const kOrtSessionOptionsDisableQDQConstantFolding =
     "session.disable_qdq_constant_folding";
 
+// Constant folding produces new initializers (folded outputs) that get added to the graph.
+// This option limits the maximum size in bytes of any single constant folding output tensor.
+// Nodes whose folded output(s) would exceed this limit are skipped to prevent the optimized
+// model's memory footprint from growing too much compared to the original model.
+// The value should be a non-negative integer in decimal string form.
+// The default value of "0" disables the threshold check (all sizes are allowed).
+static const char* const kOrtSessionOptionsConfigConstantFoldingNodeWeightSizeThreshold =
+    "session.constant_folding_node_weight_size_threshold";
+
 // It controls whether to enable Double QDQ remover and Identical Children Consolidation
 // "0": not to disable. ORT does remove the middle 2 Nodes from a Q->(QD->Q)->QD pairs
 // "1": disable. ORT doesn't remove the middle 2 Nodes from a Q->(QD->Q)->QD pairs

diff --git a/onnxruntime/core/optimizer/constant_folding.cc b/onnxruntime/core/optimizer/constant_folding.cc
@@ -11,6 +11,8 @@
 #include "core/optimizer/utils.h"
 #include "core/framework/op_kernel.h"
 #include "core/framework/tensorprotoutils.h"
+#include "core/session/onnxruntime_session_options_config_keys.h"
+#include "core/common/parse_string.h"
 
 using namespace onnxruntime::common;
 
@@ -145,6 +147,18 @@ Status ConstantFolding::ApplyImpl(Graph& graph, bool& modified, int graph_level,
   GraphViewer graph_viewer(graph);
   auto& order = graph_viewer.GetNodesInTopologicalOrder();
 
+  // Read the optional size threshold for constant folding. A value of 0 (the default) means no limit.
+  size_t output_size_threshold = 0;
+  {
+    const std::string threshold_str = config_options_.GetConfigOrDefault(
+        kOrtSessionOptionsConfigConstantFoldingNodeWeightSizeThreshold, "0");
+    if (!TryParseStringWithClassicLocale(threshold_str, output_size_threshold)) {
+      LOGS(logger, WARNING) << "Failed to parse constant folding size threshold from config value '"
+                            << threshold_str << "'. Using no threshold.";
+      output_size_threshold = 0;
+    }
+  }
+
 #if !defined(DISABLE_SPARSE_TENSORS)
   std::function<bool(const std::string&)> is_sparse_initializer_check = [&graph](const std::string& name) -> bool {
     return graph.IsSparseInitializer(name);
@@ -336,6 +350,24 @@ Status ConstantFolding::ApplyImpl(Graph& graph, bool& modified, int graph_level,
         }
       }
 
+      // If a size threshold was configured, check whether any output tensor exceeds it.
+      // Skipping large outputs prevents the optimized model from having a much larger
+      // memory footprint than the original model.
+      if (converted_to_constant && output_size_threshold > 0) {
+        for (const OrtValue& ort_value : fetches) {
+          if (ort_value.IsTensor()) {
+            const size_t tensor_size = ort_value.Get<Tensor>().SizeInBytes();
+            if (tensor_size > output_size_threshold) {
+              LOGS(logger, INFO) << "Skipping constant folding for " << node->OpType()
+                                 << " node '" << node->Name() << "': output size " << tensor_size
+                                 << " bytes exceeds the threshold of " << output_size_threshold << " bytes.";
+              converted_to_constant = false;
+              break;
+            }
+          }
+        }
+      }
+
       if (converted_to_constant) {
         for (size_t fetch_idx = 0; fetch_idx < fetches.size(); ++fetch_idx) {
           OrtValue& ort_value = fetches[fetch_idx];

diff --git a/onnxruntime/test/optimizer/graph_transform_test.cc b/onnxruntime/test/optimizer/graph_transform_test.cc
@@ -601,6 +601,50 @@ TEST_F(GraphTransformationTests, ConstantFolding) {
   ASSERT_TRUE(op_to_count["Unsqueeze"] == 0);
 }
 
+// Test that constant folding respects the size threshold config option.
+// With a threshold of 1 byte, no Unsqueeze node should be constant folded because the outputs are larger.
+// With no threshold (default "0"), all Unsqueeze nodes should be folded.
+TEST_F(GraphTransformationTests, ConstantFoldingWithSizeThreshold) {
+  constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/fuse-conv-bn-mul-add-unsqueeze.onnx";
+
+  // First, verify that without a threshold all Unsqueeze nodes are folded.
+  {
+    std::shared_ptr<Model> model;
+    ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, *logger_));
+    Graph& graph = model->MainGraph();
+    ASSERT_EQ(CountOpsInGraph(graph)["Unsqueeze"], 2);
+
+    std::unique_ptr<CPUExecutionProvider> e = std::make_unique<CPUExecutionProvider>(CPUExecutionProviderInfo());
+    onnxruntime::GraphTransformerManager graph_transformation_mgr{5};
+    const ConfigOptions empty_config_options;
+    ASSERT_STATUS_OK(graph_transformation_mgr.Register(
+        std::make_unique<ConstantFolding>(*e.get(), false /*skip_dequantize_linear*/, empty_config_options),
+        TransformerLevel::Level1));
+    ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
+    ASSERT_EQ(CountOpsInGraph(graph)["Unsqueeze"], 0);
+  }
+
+  // Now verify that with a threshold of 1 byte, no Unsqueeze node is constant folded.
+  {
+    std::shared_ptr<Model> model;
+    ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, *logger_));
+    Graph& graph = model->MainGraph();
+    ASSERT_EQ(CountOpsInGraph(graph)["Unsqueeze"], 2);
+
+    std::unique_ptr<CPUExecutionProvider> e = std::make_unique<CPUExecutionProvider>(CPUExecutionProviderInfo());
+    onnxruntime::GraphTransformerManager graph_transformation_mgr{5};
+    ConfigOptions config_options_with_threshold;
+    ASSERT_STATUS_OK(config_options_with_threshold.AddConfigEntry(
+        kOrtSessionOptionsConfigConstantFoldingNodeWeightSizeThreshold, "1"));
+    ASSERT_STATUS_OK(graph_transformation_mgr.Register(
+        std::make_unique<ConstantFolding>(*e.get(), false /*skip_dequantize_linear*/, config_options_with_threshold),
+        TransformerLevel::Level1));
+    ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
+    // The Unsqueeze outputs are larger than 1 byte, so no folding should happen.
+    ASSERT_EQ(CountOpsInGraph(graph)["Unsqueeze"], 2);
+  }
+}
+
 TEST_F(GraphTransformationTests, ConstantFoldingNodesOnDifferentEP) {
   constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/fuse-conv-bn-mul-add-unsqueeze.onnx";
   std::shared_ptr<Model> model;