Support calling custom method names via METHOD_TO_CALL (fixes triton-inference-server/server#5209)

iceychris · iceychris · commit 75f87cd2a8a4 · 2023-04-08T16:54:54.000+02:00
diff --git a/README.md b/README.md
@@ -206,6 +206,20 @@ complex execution modes and dynamic shapes. If not specified, all are enabled by
 
     `ENABLE_TENSOR_FUSER`
 
+* `METHOD_TO_CALL`: String flag to specify which method on the PyTorch model is being called.
+Default value is `forward`.
+
+The section of model config file specifying this parameter will look like:
+
+```
+parameters: {
+key: "METHOD_TO_CALL"
+    value: {
+    string_value:"true"
+    }
+}
+```
+
 ### Important Note
 
 * The execution of PyTorch model on GPU is asynchronous in nature. See
diff --git a/src/libtorch.cc b/src/libtorch.cc
@@ -103,6 +103,7 @@ class ModelState : public BackendModel {
 
   bool EnabledWeightSharing() { return enable_weight_sharing_; }
   const std::vector<std::string>& ModelOutputs() { return output_names_; }
+  const std::string& MethodToCall() { return method_to_call_; }
 
  private:
   ModelState(TRITONBACKEND_Model* triton_model);
@@ -145,6 +146,10 @@ class ModelState : public BackendModel {
   // List of all the outputs specified in the output section of model
   // configuration.
   std::vector<std::string> output_names_;
+
+  // Method to call on PyTorch Module.
+  // Defaults to "forward".
+  std::string method_to_call_;
 };
 
 TRITONSERVER_Error*
@@ -180,7 +185,8 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
       enable_weight_sharing_(false), enable_tensor_fuser_pair_({false, true}),
       enable_jit_profiling_pair_({false, true}),
       enable_jit_executor_pair_({false, true}),
-      enable_nvfuser_pair_({false, false})
+      enable_nvfuser_pair_({false, false}),
+      method_to_call_("forward")
 {
   output_names_.clear();
 
@@ -454,6 +460,29 @@ ModelState::ParseParameters()
                                   " for model instance '" + Name() + "'")
                                      .c_str());
     }
+
+    // If 'ENABLE_NVFUSER' is not present in 'parameters' then no
+    // update is made to 'enable_nvfuser'.
+    std::string method_to_call = "forward";
+    err = GetParameterValue(params, "METHOD_TO_CALL", &method_to_call);
+    if (err != nullptr) {
+      if (TRITONSERVER_ErrorCode(err) != TRITONSERVER_ERROR_NOT_FOUND) {
+        return err;
+      } else {
+        LOG_MESSAGE(
+            TRITONSERVER_LOG_INFO, (std::string("method_to_call is not specified") +
+                                    " for model instance '" + Name() + "'")
+                                       .c_str());
+        TRITONSERVER_ErrorDelete(err);
+      }
+    } else {
+      method_to_call_ = std::string("forward");
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_INFO, (std::string("method_to_call is ") +
+                                  method_to_call_ +
+                                  " for model instance '" + Name() + "'")
+                                     .c_str());
+    }
   }
 
   return nullptr;
@@ -764,7 +793,7 @@ ModelInstanceState::ValidateInputs(const size_t expected_input_cnt)
   // configuration specifies only those.
   std::vector<std::string> allowed_inputs;
 
-  const torch::jit::Method& method = torch_model_->get_method("forward");
+  const torch::jit::Method& method = torch_model_->get_method(model_state_->MethodToCall());
   const auto& schema = method.function().getSchema();
   const std::vector<c10::Argument>& arguments = schema.arguments();
 
@@ -1324,16 +1353,23 @@ ModelInstanceState::Execute(
     torch::NoGradGuard no_grad;
 
     // If input is a dictionary, prepare dictionary from 'input_tensors'.
+    std::string method_to_call = model_state_->MethodToCall();
     if (is_dict_input_) {
       torch::Dict<std::string, torch::Tensor> input_dict;
       for (auto& input_index : input_index_map_) {
         torch::jit::IValue ival = (*input_tensors)[input_index.second];
         input_dict.insert(input_index.first, ival.toTensor());
       }
-      std::vector<torch::jit::IValue> input_dict_ivalue = {input_dict};
-      model_outputs_ = torch_model_->forward(input_dict_ivalue);
+      auto typ = c10::DictType::create(c10::StringType::get(), c10::TensorType::get());
+      auto inp = c10::impl::GenericList(typ);
+      inp.emplace_back(input_dict);
+      model_outputs_ = torch_model_->run_method(method_to_call, inp);
     } else {
-      model_outputs_ = torch_model_->forward(*input_tensors);
+      auto inp = c10::impl::GenericList(c10::TensorType::get());
+      for (auto& input_tensor : *input_tensors) {
+        inp.emplace_back(input_tensor.toTensor());
+      }
+      model_outputs_ = torch_model_->run_method(method_to_call, inp);
     }
 
     if (model_outputs_.isTuple()) {