[Op] Fix nll_loss (awslabs#113)

hgt312 · web-flow · commit cd0112722861 · 2022-08-10T09:28:55.000-07:00
* fix

* test

* address comments
diff --git a/python/raf/_tvm_op/loss.py b/python/raf/_tvm_op/loss.py
@@ -83,13 +83,16 @@ def smooth_l1_loss_dtrue_compute(attr, inputs, output_type):  # pylint: disable=
 def nll_loss_compute(attrs, inputs, output_type):  # pylint: disable=unused-argument
     true, pred = inputs
     n, c = pred.shape
+    dtype = pred.dtype
+    if dtype == "float16":
+        pred = pred.astype("float32")
 
     if true.ndim == 1:  # one-host label encoding
 
         def fcompute_one_hot(i):  # pylint: disable=unused-argument
             return -pred[i, true[i]] / n
 
-        loss = _tvm.te.compute((n,), fcompute_one_hot)
+        loss = _tvm.te.compute((n,), fcompute_one_hot, tag=_tvm.topi.tag.INJECTIVE)
         loss = _topi.sum(loss, axis=[0], keepdims=True)
     else:  # sparse label encoding
 
@@ -98,12 +101,12 @@ def fcompute_sparse(x):  # pylint: disable=unused-argument
             redc = _tvm.te.reduce_axis((0, c), name="rc")
             return _tvm.te.sum(-pred[redn, redc] * true[redn, redc] / n, axis=[redc, redn])
 
-        loss = _tvm.te.compute((1,), fcompute_sparse)
+        loss = _tvm.te.compute((1,), fcompute_sparse, tag=_tvm.topi.tag.COMM_REDUCE)
 
-    return [loss]
+    return [loss.astype(dtype)]
 
 
-_reg.register_injective_schedule("raf.op.tvm.nll_loss")
+_reg.register_reduce_schedule("raf.op.tvm.nll_loss")
 
 
 @register_compute("raf.op.tvm.nll_loss_dpred")
diff --git a/tests/python/op/tvm/test_tvm_loss.py b/tests/python/op/tvm/test_tvm_loss.py
@@ -57,8 +57,12 @@ def forward(self, y_true, y_pred):  # pylint: disable=no-self-use
 @pytest.mark.parametrize("device", get_testable_devices())
 @pytest.mark.parametrize("n", [3, 7])
 @pytest.mark.parametrize("c", [2, 6])
+@pytest.mark.parametrize("dtype", ["float32", "float16"])
 @pytest.mark.parametrize("one_hot_label", [True, False])
-def test_nll_loss(device, n, c, one_hot_label):
+def test_nll_loss(device, n, c, dtype, one_hot_label):
+    if device == "cpu" and dtype == "float16":
+        pytest.skip("PyTorch nll_loss does not support float16 when using CPU.")
+
     class TestModel(raf.Model):
         def build(self):
             pass
@@ -68,10 +72,10 @@ def forward(self, y_true, y_pred):  # pylint: disable=no-self-use
             return raf.nll_loss(y_true=y_true, y_pred=y_pred)
 
     model = TestModel()
-    m_pred, t_pred = randn_torch((n, c), device=device, requires_grad=True)
+    m_pred, t_pred = randn_torch((n, c), dtype=dtype, device=device, requires_grad=True)
     m_true, np_true = randint((n,), low=0, high=c, device=device, dtype="int64")
     if not one_hot_label:
-        m_true = np.zeros((n, c), dtype="float32")
+        m_true = np.zeros((n, c), dtype=dtype)
         for i in range(n):
             m_true[i, np_true[i]] = 1
         m_true = raf.array(m_true, device=device)
@@ -83,10 +87,12 @@ def forward(self, y_true, y_pred):  # pylint: disable=no-self-use
     check(m_loss, t_loss)
     check(v_loss, t_loss)
     # backward
-    m_dy, t_dy = randn_torch((), device=device)
+    m_dy, t_dy = randn_torch((), device=device, dtype=dtype)
     t_loss.backward(t_dy)
     m_loss.backward(m_dy)
-    check(m_pred.grad, t_pred.grad)
+    rtol = 1e-5 if dtype == "float32" else 1e-3
+    atol = 1e-5 if dtype == "float32" else 1e-3
+    check(m_pred.grad, t_pred.grad, rtol=rtol, atol=atol)
 
 
 @pytest.mark.parametrize("device", ["cpu"])