Skip to content

Commit 85319a1

Browse files
committed
fix tests
Signed-off-by: richardhuo-nv <[email protected]>
1 parent b17fba7 commit 85319a1

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

tests/kvbm/test_determinism.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from datetime import datetime
2525
from enum import Enum
2626
from pathlib import Path
27-
from typing import Dict, List, Optional, TextIO, Tuple
27+
from typing import Any, Dict, List, Optional, TextIO, Tuple
2828

2929
import pytest
3030
import requests
@@ -128,7 +128,7 @@ def _set_up_trtllm_config(self, gpu_cache_blocks):
128128
config_path = os.environ.get(
129129
"KVBM_TRTLLM_LLMAPI_CONFIG_PATH", "/tmp/kvbm_llm_api_config.yaml"
130130
)
131-
llm_api_config = {}
131+
llm_api_config: dict[str, Any] = {}
132132
llm_api_config[
133133
"cuda_graph_config"
134134
] = None # explicitly disable CUDA graph since Connector API doesn't support CUDA graph yet in TRTLLM
@@ -146,7 +146,7 @@ def _set_up_trtllm_config(self, gpu_cache_blocks):
146146
if gpu_cache_blocks is not None:
147147
del llm_api_config["kv_cache_config"]["free_gpu_memory_fraction"]
148148
llm_api_config["kv_cache_config"]["max_tokens"] = (
149-
gpu_cache_blocks * 32
149+
int(gpu_cache_blocks) * 32
150150
) # TRTLLM defaults 32 tokens per block
151151

152152
# Construct serve command

0 commit comments

Comments
 (0)