Added modified pull request to handle setting number of threads for PyTorch

javier-perez-mayos · javier-perez-mayos · commit 89df3ff7f131 · 2024-04-08T12:41:55.000Z
diff --git a/src/libtorch.cc b/src/libtorch.cc
@@ -56,6 +56,12 @@
 #include <cuda_runtime_api.h>
 #endif  // TRITON_ENABLE_GPU
 
+// for thread control
+// https://pytorch.org/docs/stable/notes/cpu_threading_torchscript_inference.html#runtime-api
+// https://github.com/pytorch/pytorch/blob/v2.2.1-rc3/aten/src/ATen/Parallel.h#L133
+#include <ATen/Parallel.h>
+
+
 //
 // PyTorch C++ (LibTorch) Backend that implements the TRITONBACKEND API.
 //
@@ -465,6 +471,76 @@ ModelState::ParseParameters()
            " for model instance '" + Name() + "'")
               .c_str());
     }
+
+    // If "INTRA_OP_THREAD_COUNT" is not present in 'parameters' then no update
+    // is made to 'intra_op_thread_count', which by default will take all
+    // threads
+    int intra_op_thread_count = -1;
+    err = ParseParameterInt(
+        params, "INTRA_OP_THREAD_COUNT", &intra_op_thread_count);
+    if (err != nullptr) {
+      if (TRITONSERVER_ErrorCode(err) != TRITONSERVER_ERROR_NOT_FOUND) {
+        return err;
+      } else {
+        TRITONSERVER_ErrorDelete(err);
+      }
+    } else {
+        if (intra_op_thread_count > 0) {
+            try {
+                at::set_num_threads(intra_op_thread_count);
+                LOG_MESSAGE(
+                        TRITONSERVER_LOG_INFO,
+                        (std::string("Intra op thread count is set to ") +
+                         std::to_string(intra_op_thread_count) + " for model instance '" +
+                         Name() + "'")
+                        .c_str());
+            } catch (c10::Error &e) {
+                LOG_MESSAGE(
+                        TRITONSERVER_LOG_WARN,
+                        (std::string("Could not set intra op thread count is set to ") +
+                         std::to_string(intra_op_thread_count) + " for model instance '" +
+                         Name() + "'. Using value: " + std::to_string(at::get_num_threads()))
+                        .c_str());
+
+
+            }
+        }
+    }
+
+    // If "INTER_OP_THREAD_COUNT" is not present in 'parameters' then no update
+    // is made to 'inter_op_thread_count', which by default will take all
+    // threads
+    int inter_op_thread_count = -1;
+    err = ParseParameterInt(
+        params, "INTER_OP_THREAD_COUNT", &inter_op_thread_count);
+    if (err != nullptr) {
+      if (TRITONSERVER_ErrorCode(err) != TRITONSERVER_ERROR_NOT_FOUND) {
+        return err;
+      } else {
+        TRITONSERVER_ErrorDelete(err);
+      }
+    } else {
+      if (inter_op_thread_count > 0) {
+          try {
+              at::set_num_interop_threads(inter_op_thread_count);
+              LOG_MESSAGE(
+                      TRITONSERVER_LOG_INFO,
+                      (std::string("Inter op thread count is set to ") +
+                       std::to_string(inter_op_thread_count) + " for model instance '" +
+                       Name() + "'")
+                      .c_str());
+          } catch (c10::Error &e) {
+                LOG_MESSAGE(
+                        TRITONSERVER_LOG_WARN,
+                        (std::string("Could not set intra op thread count is set to ") +
+                         std::to_string(intra_op_thread_count) + " for model instance '" +
+                         Name() + "'. Using value: " + std::to_string(at::get_num_interop_threads()))
+                        .c_str());
+
+
+            }
+      }
+    }
   }
 
   return nullptr;
diff --git a/src/libtorch_utils.cc b/src/libtorch_utils.cc
@@ -149,6 +149,19 @@ ParseParameter(
   return nullptr;
 }
 
+TRITONSERVER_Error*
+ParseParameterInt(
+    triton::common::TritonJson::Value& params, const std::string& mkey,
+    int* value)
+{
+  std::string value_str;
+  RETURN_IF_ERROR(GetParameterValue(params, mkey, &value_str));
+  RETURN_IF_ERROR(ParseIntValue(value_str, value));
+
+  return nullptr;
+}
+
+
 #ifdef TRITON_ENABLE_GPU
 TRITONSERVER_Error*
 ConvertCUDAStatusToTritonError(
diff --git a/src/libtorch_utils.h b/src/libtorch_utils.h
@@ -62,4 +62,11 @@ TRITONSERVER_Error* ParseParameter(
     triton::common::TritonJson::Value& params, const std::string& mkey,
     bool* value);
 
+// If the key 'mkey' is present in 'params' then update 'value' with the
+// value associated with that key. If 'mkey' is not present in 'params' then
+// 'value' is set to 'default_value'.
+TRITONSERVER_Error* ParseParameterInt(
+    triton::common::TritonJson::Value& params, const std::string& mkey,
+    int* value);
+
 }}}  // namespace triton::backend::pytorch