Skip to content

Commit bb41a2e

Browse files
committed
doc(sglang): Add health check
1 parent 0621075 commit bb41a2e

1 file changed

Lines changed: 13 additions & 4 deletions

File tree

docs/USAGE.sglang.md

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1269,11 +1269,14 @@ python \
12691269
- Tested at: 2025-10-08
12701270
- Tested version:
12711271
- `hip-attention`: `3192b974685791ab08f9278a4e23be4618a227fc`
1272-
- `sglang` ([DeepAuto-AI/sglang](https://github.com/DeepAuto-AI/sglang)): `eb1197fd7ad372de83a1589ec99c101054c25cf1`
1272+
- `sglang` ([DeepAuto-AI/sglang](https://github.com/DeepAuto-AI/sglang)): `a2e22f83f39645d13b40f663ddc7f9fb199f5d13`
12731273

12741274
#### Local
12751275

12761276
```bash
1277+
# Start
1278+
port=8000
1279+
12771280
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
12781281
BSA_K=32 \
12791282
BSA_EXACT_K=32 \
@@ -1296,7 +1299,7 @@ uv run \
12961299
--env-file .env \
12971300
-m sglang.launch_server \
12981301
--host 0.0.0.0 \
1299-
--port 8000 \
1302+
--port ${port} \
13001303
--model-path Qwen/Qwen3-235B-A22B-Instruct-2507-FP8 \
13011304
--kv-cache-dtype auto \
13021305
--ep-size 8 \
@@ -1324,6 +1327,7 @@ export $(grep -v '^#' .env | xargs)
13241327
# Start
13251328
name=deepauto-qwen3-235b-a22b-instruct-2507-fp8-8gpu
13261329
version=v1.2.9-sglang
1330+
port=8000
13271331

13281332
docker run \
13291333
--rm \
@@ -1335,8 +1339,13 @@ docker run \
13351339
--env "HF_HOME=/root/.cache/huggingface" \
13361340
--env "SGL_DG_CACHE_DIR=/root/.cache/deep_gemm" \
13371341
--env "TRITON_HOME=/root/.cache" \
1338-
-p 8000:8000 \
1342+
-p ${port}:${port} \
13391343
--ipc=host \
1344+
--health-cmd "curl -f http://localhost:${port}/health || exit 1" \
1345+
--health-interval 5s \
1346+
--health-timeout 60s \
1347+
--health-retries 1 \
1348+
--health-start-period 1800s \
13401349
--env "BSA_K=32" \
13411350
--env "BSA_EXACT_K=32" \
13421351
--env "BSA_BLOCK_K=64" \
@@ -1358,7 +1367,7 @@ deepauto/hip-attention:${version} \
13581367
python \
13591368
-m sglang.launch_server \
13601369
--host 0.0.0.0 \
1361-
--port 8000 \
1370+
--port ${port} \
13621371
--model-path Qwen/Qwen3-235B-A22B-Instruct-2507-FP8 \
13631372
--kv-cache-dtype auto \
13641373
--ep-size 8 \

0 commit comments

Comments
 (0)