Expose torch._will_engine_execute_node (pytorch#84773)

soulitzer · pytorchmergebot · commit a876432aea11 · 2022-09-28T20:13:52.000Z
Addresses: pytorch#83617 This PR a way to query the TLS graph task's exec_info which is a map mapping the Node to a bool indicating whether it will be executed in the current backward pass (as determined by the inputs= argument for .grad of .backward). - this works with both custom Function nodes and normal codegened nodes - to be able to verify whether the pyobject passed is an actual node, we now store pointers to PyTypeObjects into a set on registration. - error out when .backward without inputs= to avoid silently returning True Alternatives: - not sure if it is possible to bind to Python from a raw pointer to Node. At least we wouldn't be able to use existing logic, and the Python object should only hold a weak reference to the Node. - other solutions to the motivating issue seem to require more extensive modification to the engine See the issue linked for an example of usage Pull Request resolved: pytorch#84773 Approved by: https://github.com/albanD
diff --git a/test/test_autograd.py b/test/test_autograd.py
@@ -394,6 +394,79 @@ def test_not_implemented_fwad(self):
                 # if forward AD ends up being implemented for torch.igamma, choose a different op
                 torch.igamma(dual_x, dual_x)
 
+    def test_will_engine_execute_node(self):
+        counter = [0]
+
+        class MyFunction(Function):
+            @staticmethod
+            def forward(ctx, x):
+                return x * 2
+
+            @staticmethod
+            def backward(ctx, gO):
+                return gO * 2
+
+        def get_grad_fn(t):
+            if t.requires_grad and t.grad_fn is None:
+                return t.clone().grad_fn.next_functions[0][0]
+            else:
+                return t.grad_fn
+
+        a = torch.randn(2, 3, 4, requires_grad=True)
+        a2 = torch.randn(2, 3, 4, requires_grad=True)
+        b = a * a2
+        b2 = b.cos()
+        c = MyFunction.apply(b)
+
+        should_execute = list(map(get_grad_fn, (a, b, c)))
+        should_not_execute = list(map(get_grad_fn, (a2, b2)))
+
+        def fn(x):
+            counter[0] += 1
+
+            for g in should_execute:
+                self.assertTrue(torch._C._will_engine_execute_node(g))
+
+            for g in should_not_execute:
+                self.assertFalse(torch._C._will_engine_execute_node(g))
+
+        b.register_hook(fn)
+        c.register_hook(fn)
+
+        # .backward(inputs=) is OK
+        out = c.sum()
+        torch.autograd.backward(out, inputs=(a,), retain_graph=True)
+        self.assertEqual(counter[0], 2)
+
+        # .backward() is OK
+        should_execute = list(map(get_grad_fn, (a, a2, b, c)))
+        should_not_execute = list(map(get_grad_fn, (b2,)))
+        torch.autograd.backward(out, retain_graph=True)
+
+        # .grad is NOT OK when leaf is passed (this is the current state, subject to change)
+        with self.assertRaisesRegex(RuntimeError, "are currently running autograd.grad()"):
+            torch.autograd.grad(out, (a,))
+
+        # .grad is OK when non-leaf is passed
+        a = torch.randn(1, 2, 3, requires_grad=True) * 2
+        b = a * 2
+
+        def fn(x):
+            # Check a non-leaf
+            counter[0] += 1
+            self.assertTrue(torch._C._will_engine_execute_node(b.grad_fn))
+        b.register_hook(fn)
+        counter[0] = 0
+        torch.autograd.grad(b.sum(), (a,))
+        self.assertEqual(counter[0], 1)
+
+        # Verify other errors are raised
+        with self.assertRaisesRegex(RuntimeError, "during the backward pass"):
+            torch._C._will_engine_execute_node(out.grad_fn)
+
+        with self.assertRaisesRegex(RuntimeError, "expects an grad_fn"):
+            torch._C._will_engine_execute_node(out)
+
     def test_accumulate_grad(self):
         grad_output = torch.ones(5, 5)
 
diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp
@@ -38,8 +38,10 @@
 #include <torch/csrc/THP.h>
 #include <torch/csrc/TypeInfo.h>
 #include <torch/csrc/api/include/torch/python/init.h>
+#include <torch/csrc/autograd/python_cpp_function.h>
 #include <torch/csrc/autograd/python_enum_tag.h>
 #include <torch/csrc/autograd/python_fft_functions.h>
+#include <torch/csrc/autograd/python_function.h>
 #include <torch/csrc/autograd/python_legacy_variable.h>
 #include <torch/csrc/autograd/python_linalg_functions.h>
 #include <torch/csrc/autograd/python_nested_functions.h>
@@ -708,6 +710,54 @@ PyObject* THPModule_isEnabledXNNPACK(PyObject* _unused, PyObject* noargs) {
     Py_RETURN_FALSE;
 }
 
+PyObject* THPModule_willEngineExecuteNode(PyObject* _unused, PyObject* arg) {
+  HANDLE_TH_ERRORS
+  bool isTHPFunction = THPFunction_Check(arg);
+  bool isTHPCppFunction = torch::autograd::THPCppFunction_Check(arg);
+  THPUtils_assert(
+      isTHPFunction || isTHPCppFunction,
+      "_will_engine_execute_node expects an grad_fn, "
+      "but got %s",
+      THPUtils_typename(arg));
+  const auto exec_info = torch::autograd::get_current_graph_task_exec_info();
+  THPUtils_assert(
+      exec_info,
+      "_get_should_execute_nodes should only be called during the backward pass");
+  torch::autograd::Node* node;
+  std::shared_ptr<torch::autograd::Node> node_sp;
+  if (isTHPFunction) {
+    node_sp = ((THPFunction*)arg)->cdata.lock();
+    node = node_sp.get();
+  } else {
+    node = ((torch::autograd::THPCppFunction*)arg)->cdata.get();
+  }
+  if (exec_info->empty()) {
+    // .backward() without inputs= arg
+    const auto nodes_in_graph =
+        torch::autograd::get_current_graph_task_nodes_in_graph();
+    auto it = nodes_in_graph->find(node);
+    if (it == nodes_in_graph->end()) {
+      Py_RETURN_FALSE;
+    } else {
+      Py_RETURN_TRUE;
+    }
+  } else {
+    // .grad or .backward when inputs= is passed
+    auto it = exec_info->find(node);
+    if (it == exec_info->end() || !it->second.should_execute()) {
+      Py_RETURN_FALSE;
+    } else {
+      THPUtils_assert(
+          !(node->topological_nr() == 0 && it->second.captures_),
+          "A leaf node was passed to _will_engine_execute_node but we are "
+          "currently running autograd.grad(). This is currently not supported.");
+      Py_RETURN_TRUE;
+    }
+  }
+
+  END_HANDLE_TH_ERRORS
+}
+
 PyObject* THPModule_setDefaultMobileCPUAllocator(
     PyObject* _unused,
     PyObject* noargs) {
@@ -888,6 +938,10 @@ static PyMethodDef TorchMethods[] = {
     {"_set_qengine", THPModule_setQEngine, METH_O, nullptr},
     {"_supported_qengines", THPModule_supportedQEngines, METH_NOARGS, nullptr},
     {"_is_xnnpack_enabled", THPModule_isEnabledXNNPACK, METH_NOARGS, nullptr},
+    {"_will_engine_execute_node",
+     THPModule_willEngineExecuteNode,
+     METH_O,
+     nullptr},
     {"_set_default_mobile_cpu_allocator",
      THPModule_setDefaultMobileCPUAllocator,
      METH_NOARGS,
diff --git a/torch/csrc/autograd/engine.cpp b/torch/csrc/autograd/engine.cpp
@@ -381,6 +381,10 @@ get_current_graph_task_exec_info() {
   return current_graph_task ? &current_graph_task->exec_info_ : nullptr;
 }
 
+const std::unordered_set<Node*>* get_current_graph_task_nodes_in_graph() {
+  return current_graph_task ? &current_graph_task->nodes_in_graph_ : nullptr;
+}
+
 bool get_current_graph_task_keep_graph() {
   return current_graph_task ? current_graph_task->keep_graph_ : true;
 }
@@ -1006,7 +1010,6 @@ auto Engine::compute_dependencies(
     GraphTask& task,
     uint64_t min_topo_nr) -> void {
   // Computes the number of dependencies for each function which requires grad
-  std::unordered_set<Node*> seen;
   std::vector<Node*> queue{root};
   bool might_use_cuda = at::globalContext().hasCUDA();
   bool will_use_cuda = false;
@@ -1026,7 +1029,7 @@ auto Engine::compute_dependencies(
     for (const auto& edge : fn->next_edges()) {
       if (auto next_ptr = edge.function.get()) {
         dependencies[next_ptr] += 1;
-        const bool was_inserted = seen.insert(next_ptr).second;
+        const bool was_inserted = task.nodes_in_graph_.insert(next_ptr).second;
         if (was_inserted)
           queue.push_back(next_ptr);
       }
diff --git a/torch/csrc/autograd/graph_task.h b/torch/csrc/autograd/graph_task.h
@@ -31,6 +31,8 @@ struct GraphTask : std::enable_shared_from_this<GraphTask> {
   std::unordered_map<Node*, InputBuffer> not_ready_;
   std::unordered_map<Node*, int> dependencies_;
 
+  // Records the nodes that are in the graph
+  std::unordered_set<Node*> nodes_in_graph_;
   // Note [Exec info]
   // Exec info is created for each GraphTask, which allows filtering paths on
   // the graph that are not needed. It has a bit complicated semantics. If it's
@@ -186,6 +188,8 @@ class GraphTaskGuard {
 
 TORCH_API const std::unordered_map<Node*, GraphTask::ExecInfo>*
 get_current_graph_task_exec_info();
+TORCH_API const std::unordered_set<Node*>*
+get_current_graph_task_nodes_in_graph();
 TORCH_API bool get_current_graph_task_keep_graph();
 void add_node_to_current_graph_task_exec_info(Node* fn);
 
diff --git a/torch/csrc/autograd/python_cpp_function.cpp b/torch/csrc/autograd/python_cpp_function.cpp
@@ -203,7 +203,8 @@ PyTypeObject* _initFunctionPyTypeObject(
   return &type;
 }
 
-static std::unordered_map<std::type_index, THPObjectPtr> cpp_function_types;
+static std::unordered_map<std::type_index, THPObjectPtr> cpp_function_types_map;
+static std::unordered_set<PyTypeObject*> cpp_function_types_set;
 
 struct DefaultFunctionType {
   DefaultFunctionType() : type() {
@@ -231,10 +232,10 @@ PyObject* functionToPyObject(const std::shared_ptr<Node>& cdata) {
     Py_INCREF(cdata->pyobj());
   } else {
     auto& fn = *cdata;
-    auto it = cpp_function_types.find(std::type_index(typeid(fn)));
+    auto it = cpp_function_types_map.find(std::type_index(typeid(fn)));
     // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
     PyTypeObject* type;
-    if (it == cpp_function_types.end()) {
+    if (it == cpp_function_types_map.end()) {
       type = &default_type.type;
     } else {
       type = (PyTypeObject*)it->second.get();
@@ -255,7 +256,9 @@ PyObject* functionToPyObject(const std::shared_ptr<Node>& cdata) {
 
 void registerCppFunction(const std::type_info& type, PyTypeObject* pytype) {
   Py_INCREF((PyObject*)pytype);
-  cpp_function_types[std::type_index(type)] = THPObjectPtr((PyObject*)pytype);
+  cpp_function_types_map[std::type_index(type)] =
+      THPObjectPtr((PyObject*)pytype);
+  cpp_function_types_set.insert(pytype);
 }
 
 PyObject* registerFunctionHook(Node& fn, PyObject* hook) {
@@ -317,5 +320,15 @@ PyObject* registerFunctionPreHook(Node& fn, PyObject* hook) {
   return handle;
 }
 
+bool THPCppFunction_Check(PyObject* obj) {
+  THPObjectPtr type = THPObjectPtr(PyObject_Type(obj));
+  if (cpp_function_types_set.find((PyTypeObject*)type.get()) ==
+      cpp_function_types_set.end()) {
+    return false;
+  } else {
+    return true;
+  }
+}
+
 } // namespace autograd
 } // namespace torch
diff --git a/torch/csrc/autograd/python_cpp_function.h b/torch/csrc/autograd/python_cpp_function.h
@@ -96,5 +96,7 @@ PyTypeObject* createForwardFunctionPyTypeObject(
 void registerCppFunction(const std::type_info& type, PyTypeObject* pytype);
 PyObject* functionToPyObject(const std::shared_ptr<Node>& cdata);
 
+bool THPCppFunction_Check(PyObject* obj);
+
 } // namespace autograd
 } // namespace torch