Skip to content

Commit

Permalink
benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
jlonge4 committed Jan 21, 2025
1 parent f4ff271 commit 0c87e29
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
version: '3.7'

services:
tgi-1:
image: neuronx-tgi:latest
ports:
- "8081:8081"
environment:
- PORT=8081
- MODEL_ID=${MODEL_ID}
- HF_AUTO_CAST_TYPE=${HF_AUTO_CAST_TYPE}
- HF_NUM_CORES=8
- MAX_BATCH_SIZE=${MAX_BATCH_SIZE}
- MAX_INPUT_TOKENS=${MAX_INPUT_TOKENS}
- MAX_TOTAL_TOKENS=${MAX_TOTAL_TOKENS}
- MAX_CONCURRENT_REQUESTS=512
- HF_TOKEN=${HF_TOKEN}
devices:
- "/dev/neuron0"
- "/dev/neuron1"
- "/dev/neuron2"
- "/dev/neuron3"

tgi-2:
image: neuronx-tgi:latest
ports:
- "8082:8082"
environment:
- PORT=8082
- MODEL_ID=${MODEL_ID}
- HF_AUTO_CAST_TYPE=${HF_AUTO_CAST_TYPE}
- HF_NUM_CORES=8
- MAX_BATCH_SIZE=${MAX_BATCH_SIZE}
- MAX_INPUT_TOKENS=${MAX_INPUT_TOKENS}
- MAX_TOTAL_TOKENS=${MAX_TOTAL_TOKENS}
- MAX_CONCURRENT_REQUESTS=512
- HF_TOKEN=${HF_TOKEN}
devices:
- "/dev/neuron4"
- "/dev/neuron5"
- "/dev/neuron6"
- "/dev/neuron7"

tgi-3:
image: neuronx-tgi:latest
ports:
- "8083:8083"
environment:
- PORT=8083
- MODEL_ID=${MODEL_ID}
- HF_AUTO_CAST_TYPE=${HF_AUTO_CAST_TYPE}
- HF_NUM_CORES=8
- MAX_BATCH_SIZE=${MAX_BATCH_SIZE}
- MAX_INPUT_TOKENS=${MAX_INPUT_TOKENS}
- MAX_TOTAL_TOKENS=${MAX_TOTAL_TOKENS}
- MAX_CONCURRENT_REQUESTS=512
- HF_TOKEN=${HF_TOKEN}
devices:
- "/dev/neuron8"
- "/dev/neuron9"
- "/dev/neuron10"
- "/dev/neuron11"

loadbalancer:
image: nginx:alpine
ports:
- "8080:80"
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf:ro
depends_on:
- tgi-1
- tgi-2
- tgi-3
deploy:
placement:
constraints: [node.role == manager]
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
### Nginx TGI Load Balancer
events {}
http {
upstream tgicluster {
server tgi-1:8081;
server tgi-2:8082;
server tgi-3:8083;
}
server {
listen 80;
location / {
proxy_pass http://tgicluster;
}
}
}

0 comments on commit 0c87e29

Please sign in to comment.