Add tests for bool type

botbigeyes · botbigeyes · commit 5e655c8cec52 · 2025-11-05T09:20:14.000Z
diff --git a/tests/test_reduction_ops.py b/tests/test_reduction_ops.py
@@ -1486,21 +1486,25 @@ def test_accuracy_depthwise2d(
 
 
 INDEX_PUT_SHAPE_ACC_FALSE = (
-    ((2**28,), ((2**16,),), (2**16,)),
-    ((32, 32), ((8,), (8,)), (8,)),
-    ((32, 32), ((8,), (2, 8)), (8,)),
-    ((32, 32), ((2, 8),), (32,)),
-    ((512, 512, 512), ((128,), (128,), (128,)), (128,)),
-    ((512, 512, 512), ((2, 128), (128,), (128,)), (128,)),
-    ((512, 512, 512), ((2, 128),), (512,)),
+    ((2**28,), ((2**16,),), (2**16,), False),
+    ((32, 32), ((8,), (8,)), (8,), False),
+    ((32, 32), ((8,), (2, 8)), (8,), False),
+    ((32, 32), ((2, 8),), (32,), False),
+    ((512, 512, 512), ((128,), (128,), (128,)), (128,), False),
+    ((512, 512, 512), ((2, 128), (128,), (128,)), (128,), False),
+    ((512, 512, 512), ((2, 128),), (512,), False),
     (
         (64, 64, 64),
         (
             (2, 8),
             (2, 8),
         ),
         (2, 8, 64),
+        False,
     ),
+    ((100,), ((100,),), (100,), True),
+    ((32, 32), ((32, 32),), (32, 32), True),
+    ((16, 16, 4), ((16, 16, 4),), (16, 16, 4), True),
 )
 
 INDEX_ACC_SHAPE = (
@@ -1521,30 +1525,44 @@ def test_accuracy_depthwise2d(
 )
 
 
-def gen_indices(input_shape, indices_shape, accumulate):
+def gen_indices(input_shape, indices_shape, accumulate, is_bool):
     indices = []
-    for i, shape in enumerate(indices_shape):
-        index = np.random.choice(
-            np.arange(input_shape[i]), size=shape, replace=accumulate
-        )
-        indices.append(torch.tensor(index, device=flag_gems.device))
-    return indices
+    
+    if is_bool:
+        mask_shape = indices_shape[0]
+
+        mask = torch.randint(0, 2, size=mask_shape, dtype=torch.bool, device=flag_gems.device)
+        return [mask]
+        
+    else:
+        for i, shape in enumerate(indices_shape):
+            index = np.random.choice(
+                np.arange(input_shape[i]), size=shape, replace=accumulate
+            )
+            indices.append(torch.tensor(index, device=flag_gems.device))
+        return indices
 
 
 @pytest.mark.index_put
 @pytest.mark.parametrize(
-    "input_shape, indices_shape, values_shape", INDEX_PUT_SHAPE_ACC_FALSE
+    "input_shape, indices_shape, values_shape, is_bool", INDEX_PUT_SHAPE_ACC_FALSE
 )
 @pytest.mark.parametrize("dtype", FLOAT_DTYPES)
-def test_index_put_acc_false(input_shape, indices_shape, values_shape, dtype):
+def test_index_put_acc_false(input_shape, indices_shape, values_shape, is_bool, dtype):
     accumulate = False
     inp = torch.randn(
         input_shape, dtype=dtype, device=flag_gems.device, requires_grad=False
     )
-    indices = gen_indices(input_shape, indices_shape, accumulate)
-    values = torch.randn(
-        values_shape, dtype=dtype, device=flag_gems.device, requires_grad=False
-    )
+
+    indices = gen_indices(input_shape, indices_shape, accumulate, is_bool)
+    
+    if is_bool:
+        K = indices[0].sum().item()
+        values = torch.randn((K,), dtype=dtype, device=flag_gems.device, requires_grad=False)
+    else:
+        values = torch.randn(
+            values_shape, dtype=dtype, device=flag_gems.device, requires_grad=False
+        )
 
     ref_inp = to_reference(inp)
     ref_indices = [to_reference(index) for index in indices]
@@ -1555,19 +1573,20 @@ def test_index_put_acc_false(input_shape, indices_shape, values_shape, dtype):
 
 
 INDEX_PUT_SHAPE_ACC_TRUE = (
-    ((2**28,), ((2**16,),), (2**16,)),
-    ((32, 32), ((8,), (8,)), (8,)),
-    ((512, 512, 512), ((128,), (128,), (128,)), (128,)),
-    ((64, 64, 64), ((2, 8), (2, 8), (2, 8)), (2, 8)),
+    ((2**28,), ((2**16,),), (2**16,), False),
+    ((32, 32), ((8,), (8,)), (8,), False),
+    ((512, 512, 512), ((128,), (128,), (128,)), (128,), False),
+    ((64, 64, 64), ((2, 8), (2, 8), (2, 8)), (2, 8), False),
+    ((32, 32), ((32, 32),), (32 * 32,), True), 
 )
 
 
 @pytest.mark.index_put
 @pytest.mark.parametrize(
-    "input_shape, indices_shape, values_shape", INDEX_PUT_SHAPE_ACC_TRUE
+    "input_shape, indices_shape, values_shape, is_bool", INDEX_PUT_SHAPE_ACC_TRUE
 )
 @pytest.mark.parametrize("dtype", [torch.float16, torch.float32])
-def test_index_put_acc_true(input_shape, indices_shape, values_shape, dtype):
+def test_index_put_acc_true(input_shape, indices_shape, values_shape, is_bool, dtype):
     init_seed(0)
     if flag_gems.vendor_name == "cambricon":
         torch.manual_seed(24)
@@ -1576,10 +1595,16 @@ def test_index_put_acc_true(input_shape, indices_shape, values_shape, dtype):
     inp = torch.randn(
         input_shape, dtype=dtype, device=flag_gems.device, requires_grad=False
     )
-    indices = gen_indices(input_shape, indices_shape, accumulate)
-    values = torch.randn(
-        values_shape, dtype=dtype, device=flag_gems.device, requires_grad=False
-    )
+
+    indices = gen_indices(input_shape, indices_shape, accumulate, is_bool)
+    
+    if is_bool:
+        K = indices[0].sum().item()
+        values = torch.randn((K,), dtype=dtype, device=flag_gems.device, requires_grad=False)
+    else:
+        values = torch.randn(
+            values_shape, dtype=dtype, device=flag_gems.device, requires_grad=False
+        )
 
     ref_inp = to_reference(inp, upcast=True)
     ref_indices = [to_reference(index) for index in indices]
@@ -1591,18 +1616,24 @@ def test_index_put_acc_true(input_shape, indices_shape, values_shape, dtype):
 
 @pytest.mark.index_put_
 @pytest.mark.parametrize(
-    "input_shape, indices_shape, values_shape", INDEX_PUT_SHAPE_ACC_FALSE
+    "input_shape, indices_shape, values_shape, is_bool", INDEX_PUT_SHAPE_ACC_FALSE
 )
 @pytest.mark.parametrize("dtype", FLOAT_DTYPES)
-def test_index_put__acc_false(input_shape, indices_shape, values_shape, dtype):
+def test_index_put__acc_false(input_shape, indices_shape, values_shape, is_bool, dtype):
     accumulate = False
     inp = torch.randn(
         input_shape, dtype=dtype, device=flag_gems.device, requires_grad=False
     )
-    indices = gen_indices(input_shape, indices_shape, accumulate)
-    values = torch.randn(
-        values_shape, dtype=dtype, device=flag_gems.device, requires_grad=False
-    )
+
+    indices = gen_indices(input_shape, indices_shape, accumulate, is_bool)
+    
+    if is_bool:
+        K = indices[0].sum().item()
+        values = torch.randn((K,), dtype=dtype, device=flag_gems.device, requires_grad=False)
+    else:
+        values = torch.randn(
+            values_shape, dtype=dtype, device=flag_gems.device, requires_grad=False
+        )
 
     ref_inp = to_reference(inp)
     ref_indices = [to_reference(index) for index in indices]
@@ -1614,10 +1645,10 @@ def test_index_put__acc_false(input_shape, indices_shape, values_shape, dtype):
 
 @pytest.mark.index_put_
 @pytest.mark.parametrize(
-    "input_shape, indices_shape, values_shape", INDEX_PUT_SHAPE_ACC_TRUE
+    "input_shape, indices_shape, values_shape, is_bool", INDEX_PUT_SHAPE_ACC_TRUE
 )
 @pytest.mark.parametrize("dtype", [torch.float16, torch.float32])
-def test_index_put__acc_true(input_shape, indices_shape, values_shape, dtype):
+def test_index_put__acc_true(input_shape, indices_shape, values_shape, is_bool, dtype):
     if flag_gems.vendor_name == "kunlunxin":
         torch.manual_seed(0)
         torch.cuda.manual_seed_all(0)
@@ -1631,10 +1662,16 @@ def test_index_put__acc_true(input_shape, indices_shape, values_shape, dtype):
     inp = torch.randn(
         input_shape, dtype=dtype, device=flag_gems.device, requires_grad=False
     )
-    indices = gen_indices(input_shape, indices_shape, accumulate)
-    values = torch.randn(
-        values_shape, dtype=dtype, device=flag_gems.device, requires_grad=False
-    )
+
+    indices = gen_indices(input_shape, indices_shape, accumulate, is_bool)
+    
+    if is_bool:
+        K = indices[0].sum().item()
+        values = torch.randn((K,), dtype=dtype, device=flag_gems.device, requires_grad=False)
+    else:
+        values = torch.randn(
+            values_shape, dtype=dtype, device=flag_gems.device, requires_grad=False
+        )
 
     ref_inp = to_reference(inp, upcast=True)
     ref_indices = [to_reference(index) for index in indices]