2
2
# SPDX-License-Identifier: Apache-2.0
3
3
4
4
services :
5
+
5
6
tgi-service :
6
7
image : ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
7
- container_name : tgi-service
8
+ container_name : tgi-server
9
+ # profiles:
10
+ # - codegen-xeon-tgi
8
11
ports :
9
12
- " 8028:80"
10
13
volumes :
11
- - " ./data:/data"
14
+ - " ${MODEL_CACHE:- ./data} :/data"
12
15
shm_size : 1g
13
16
environment :
14
17
no_proxy : ${no_proxy}
@@ -22,46 +25,82 @@ services:
22
25
timeout : 10s
23
26
retries : 100
24
27
command : --model-id ${LLM_MODEL_ID} --cuda-graphs 0
28
+
29
+ vllm-service :
30
+ image : ${REGISTRY:-opea}/vllm:${TAG:-latest}
31
+ container_name : vllm-server
32
+ profiles :
33
+ - codegen-xeon-vllm
34
+ ports :
35
+ - " 8028:80"
36
+ volumes :
37
+ - " ${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
38
+ shm_size : 1g
39
+ environment :
40
+ no_proxy : ${no_proxy}
41
+ http_proxy : ${http_proxy}
42
+ https_proxy : ${https_proxy}
43
+ HF_TOKEN : ${HUGGINGFACEHUB_API_TOKEN}
44
+ host_ip : ${host_ip}
45
+ healthcheck :
46
+ test : ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"]
47
+ interval : 10s
48
+ timeout : 10s
49
+ retries : 100
50
+ command : --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80
25
51
26
- llm :
52
+ llm-base :
27
53
image : ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
28
54
container_name : llm-textgen-server
29
- depends_on :
30
- tgi-service :
31
- condition : service_healthy
32
- ports :
33
- - " 9000:9000"
34
- ipc : host
35
55
environment :
36
56
no_proxy : ${no_proxy}
37
57
http_proxy : ${http_proxy}
38
58
https_proxy : ${https_proxy}
39
- LLM_ENDPOINT : ${TGI_LLM_ENDPOINT }
59
+ LLM_ENDPOINT : ${LLM_ENDPOINT }
40
60
LLM_MODEL_ID : ${LLM_MODEL_ID}
41
61
HUGGINGFACEHUB_API_TOKEN : ${HUGGINGFACEHUB_API_TOKEN}
42
62
restart : unless-stopped
43
-
63
+
64
+ llm-tgi-service :
65
+ extends : llm-base
66
+ container_name : llm-codegen-tgi-server
67
+ # profiles:
68
+ # - codegen-xeon-tgi
69
+ ports :
70
+ - " 9000:9000"
71
+ ipc : host
72
+ depends_on :
73
+ tgi-service :
74
+ condition : service_healthy
75
+ llm-vllm-service :
76
+ extends : llm-base
77
+ container_name : llm-codegen-vllm-server
78
+ profiles :
79
+ - codegen-xeon-vllm
80
+ ports :
81
+ - " 9000:9000"
82
+ ipc : host
83
+ depends_on :
84
+ vllm-service :
85
+ condition : service_healthy
44
86
45
87
codegen-xeon-backend-server :
46
88
image : ${REGISTRY:-opea}/codegen:${TAG:-latest}
47
89
container_name : codegen-xeon-backend-server
48
90
depends_on :
49
- - llm
91
+ - llm-base
50
92
ports :
51
93
- " 7778:7778"
52
94
environment :
53
95
- no_proxy=${no_proxy}
54
96
- https_proxy=${https_proxy}
55
97
- http_proxy=${http_proxy}
56
- - MEGA_SERVICE_HOST_IP=${host_ip} # ${MEGA_SERVICE_HOST_IP}
57
- - LLM_SERVICE_HOST_IP=${host_ip} # ${LLM_SERVICE_HOST_IP}
58
- # - RETRIEVAL_SERVICE_HOST_IP=${REDIS_RETRIEVER_PORT}
59
- - RETRIEVAL_SERVICE_HOST_IP=${host_ip} # ${RETRIEVAL_SERVICE_HOST_IP}
98
+ - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
99
+ - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
100
+ - RETRIEVAL_SERVICE_HOST_IP=${RETRIEVAL_SERVICE_HOST_IP}
60
101
- REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT}
61
- # - MM_EMBEDDING_SERVICE_HOST_IP=${MM_EMBEDDING_PORT_MICROSERVICE}
62
- - TEI_EMBEDDING_HOST_IP=${host_ip} # ${TEI_EMBEDDING_HOST_IP}
102
+ - TEI_EMBEDDING_HOST_IP=${TEI_EMBEDDING_HOST_IP}
63
103
- EMBEDDER_PORT=${EMBEDDER_PORT}
64
-
65
104
ipc : host
66
105
restart : always
67
106
codegen-xeon-ui-server :
@@ -85,7 +124,7 @@ services:
85
124
ports :
86
125
- " ${REDIS_DB_PORT}:${REDIS_DB_PORT}"
87
126
- " ${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}"
88
-
127
+
89
128
dataprep-redis-server :
90
129
image : ${REGISTRY:-opea}/dataprep:${TAG:-latest}
91
130
container_name : dataprep-redis-server
@@ -165,6 +204,8 @@ services:
165
204
RETRIEVER_COMPONENT_NAME : ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS}
166
205
restart : unless-stopped
167
206
207
+
208
+
168
209
networks :
169
210
default :
170
- driver : bridge
211
+ driver : bridge
0 commit comments