[BUG]fix some tiny bug (#16)

SiqiLi-Fighting · web-flow · commit b602c044ac91 · 2025-09-28T16:13:20.000+08:00
* fix outcache loc

* fix some dtype bug/ simulate acc bug
diff --git a/python/sgl_jax/srt/layers/logits_processor.py b/python/sgl_jax/srt/layers/logits_processor.py
@@ -297,11 +297,11 @@ def __call__(
             sample_indices = device_array(
                 np.array(
                     sample_indices,
-                    dtype=jnp.int64,
+                    dtype=np.int64,
                 ),
             )
             input_logprob_indices = device_array(
-                np.array(input_logprob_indices, dtype=jnp.int64),
+                np.array(input_logprob_indices, dtype=np.int64),
             )
 
         # Compute logits for both input and sampled tokens.
diff --git a/python/sgl_jax/srt/managers/schedule_batch.py b/python/sgl_jax/srt/managers/schedule_batch.py
@@ -1159,7 +1159,7 @@ def get_model_worker_batch(
             if positions_cpu is None:
                 # For decode: each sequence contributes one token at the next position (seq_len)
                 # Create positions for actual tokens (one per sequence at seq_len)
-                batch_positions = max(0, seq_lens_cpu - 1)
+                batch_positions = np.maximum(0, seq_lens_cpu - 1)
                 # Create positions array matching the length of input_ids (including padding)
                 positions_cpu = np.zeros(
                     len(input_ids_cpu), dtype=batch_positions.dtype
diff --git a/python/sgl_jax/srt/speculative/eagle_util.py b/python/sgl_jax/srt/speculative/eagle_util.py
@@ -347,7 +347,7 @@ def create_idle_input(
             verified_id=jnp.empty((0,), dtype=jnp.int32),
             hidden_states=jnp.empty((0, hidden_size), dtype=dtype),
             topk_p=jnp.empty((0, topk), dtype=jnp.float32),
-            topk_index=jnp.empty((0, topk), dtype=jnp.int64),
+            topk_index=jnp.empty((0, topk), dtype=jnp.int32),
             capture_hidden_mode=capture_hidden_mode,
             accept_length=jnp.empty((0,), dtype=jnp.int32),
             accept_length_cpu=[],
@@ -817,7 +817,7 @@ def verify(
             accept_length_cpu = accept_length.tolist()
             if len(unfinished_accept_index) > 0:
                 unfinished_accept_index = jnp.concatenate(unfinished_accept_index)
-                unfinished_index_device = jnp.array(unfinished_index, dtype=jnp.int64)
+                unfinished_index_device = jnp.array(unfinished_index, dtype=jnp.int32)
                 draft_input_accept_length_cpu = [
                     accept_length_cpu[i] for i in unfinished_index
                 ]
@@ -826,7 +826,7 @@ def verify(
                 else:
                     batch.out_cache_loc = jnp.empty(
                         len(unfinished_index) + sum(draft_input_accept_length_cpu),
-                        dtype=jnp.int64,
+                        dtype=jnp.int32,
                     )
                     accept_length_filter = create_accept_length_filter(
                         accept_length,
@@ -903,16 +903,16 @@ def _generate_simulated_accept_index(
             weight_upper = simulate_acc_len_float - lower
             weight_lower = 1.0 - weight_upper
             # here, data is on cpu
-            probs = numpy.array([weight_lower, weight_upper])
-            sampled_index = jax.random.multinomial(rng, probs, shape=(1,))
+            probs = jnp.array([weight_lower, weight_upper])
+            sampled_index = jax.random.categorical(rng, jnp.log(probs))
             simulate_acc_len = lower if sampled_index == 0 else upper
     else:
         raise ValueError(f"Invalid simulate_acc_method: {SIMULATE_ACC_METHOD}")
 
     accept_indx_first_col = accept_index[:, 0].reshape(-1, 1)
     sim_accept_index = jnp.full((bs, spec_steps + 1), -1, dtype=jnp.int32)
-    sim_accept_index[:, :simulate_acc_len] = accept_indx_first_col + jnp.arange(
-        simulate_acc_len
+    sim_accept_index = sim_accept_index.at[:, :simulate_acc_len].set(
+        accept_indx_first_col + jnp.arange(simulate_acc_len)
     )
     accept_length = accept_length.at[:].set(simulate_acc_len - 1)
     predict = predict.at[:].set(100)  # some legit token id

Original file line number	Diff line number	Diff line change
`@@ -297,11 +297,11 @@ def __call__(`
`297`	`297`	`sample_indices = device_array(`
`298`	`298`	`np.array(`
`299`	`299`	`sample_indices,`
`300`		`- dtype=jnp.int64,`
	`300`	`+ dtype=np.int64,`
`301`	`301`	`),`
`302`	`302`	`)`
`303`	`303`	`input_logprob_indices = device_array(`
`304`		`- np.array(input_logprob_indices, dtype=jnp.int64),`
	`304`	`+ np.array(input_logprob_indices, dtype=np.int64),`
`305`	`305`	`)`
`306`	`306`
`307`	`307`	`# Compute logits for both input and sampled tokens.`