4
4
services :
5
5
tgi-service :
6
6
image : ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
7
- container_name : tgi-server
8
- profiles :
9
- - codegen-xeon-tgi
7
+ container_name : tgi-service
10
8
ports :
11
9
- " 8028:80"
12
10
volumes :
13
- - " ${MODEL_CACHE:- ./data} :/data"
11
+ - " ./data:/data"
14
12
shm_size : 1g
15
13
environment :
16
14
no_proxy : ${no_proxy}
@@ -24,78 +22,41 @@ services:
24
22
timeout : 10s
25
23
retries : 100
26
24
command : --model-id ${LLM_MODEL_ID} --cuda-graphs 0
27
- vllm-service :
28
- image : ${REGISTRY:-opea}/vllm:${TAG:-latest}
29
- container_name : vllm-server
30
- profiles :
31
- - codegen-xeon-vllm
32
- ports :
33
- - " 8028:80"
34
- volumes :
35
- - " ${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
36
- shm_size : 1g
37
- environment :
38
- no_proxy : ${no_proxy}
39
- http_proxy : ${http_proxy}
40
- https_proxy : ${https_proxy}
41
- HF_TOKEN : ${HUGGINGFACEHUB_API_TOKEN}
42
- host_ip : ${host_ip}
43
- healthcheck :
44
- test : ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"]
45
- interval : 10s
46
- timeout : 10s
47
- retries : 100
48
- command : --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80
49
- llm-base :
25
+ llm :
50
26
image : ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
51
27
container_name : llm-textgen-server
28
+ depends_on :
29
+ tgi-service :
30
+ condition : service_healthy
31
+ ports :
32
+ - " 9000:9000"
33
+ ipc : host
52
34
environment :
53
35
no_proxy : ${no_proxy}
54
36
http_proxy : ${http_proxy}
55
37
https_proxy : ${https_proxy}
56
- LLM_ENDPOINT : ${LLM_ENDPOINT }
38
+ LLM_ENDPOINT : ${TGI_LLM_ENDPOINT }
57
39
LLM_MODEL_ID : ${LLM_MODEL_ID}
58
40
HUGGINGFACEHUB_API_TOKEN : ${HUGGINGFACEHUB_API_TOKEN}
59
- HF_TOKEN : ${HUGGINGFACEHUB_API_TOKEN}
60
41
restart : unless-stopped
61
- llm-tgi-service :
62
- extends : llm-base
63
- container_name : llm-codegen-tgi-server
64
- profiles :
65
- - codegen-xeon-tgi
66
- ports :
67
- - " 9000:9000"
68
- ipc : host
69
- depends_on :
70
- tgi-service :
71
- condition : service_healthy
72
- llm-vllm-service :
73
- extends : llm-base
74
- container_name : llm-codegen-vllm-server
75
- profiles :
76
- - codegen-xeon-vllm
77
- ports :
78
- - " 9000:9000"
79
- ipc : host
80
- depends_on :
81
- vllm-service :
82
- condition : service_healthy
83
42
codegen-xeon-backend-server :
84
43
image : ${REGISTRY:-opea}/codegen:${TAG:-latest}
85
44
container_name : codegen-xeon-backend-server
86
45
depends_on :
87
- - llm-base
46
+ - llm
88
47
ports :
89
48
- " 7778:7778"
90
49
environment :
91
50
- no_proxy=${no_proxy}
92
51
- https_proxy=${https_proxy}
93
52
- http_proxy=${http_proxy}
94
- - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
95
- - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
96
- - RETRIEVAL_SERVICE_HOST_IP=${RETRIEVAL_SERVICE_HOST_IP}
53
+ - MEGA_SERVICE_HOST_IP=${host_ip} # ${MEGA_SERVICE_HOST_IP}
54
+ - LLM_SERVICE_HOST_IP=${host_ip} # ${LLM_SERVICE_HOST_IP}
55
+ # - RETRIEVAL_SERVICE_HOST_IP=${REDIS_RETRIEVER_PORT}
56
+ - RETRIEVAL_SERVICE_HOST_IP=${host_ip} # ${RETRIEVAL_SERVICE_HOST_IP}
97
57
- REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT}
98
- - TEI_EMBEDDING_HOST_IP=${TEI_EMBEDDING_HOST_IP}
58
+ # - MM_EMBEDDING_SERVICE_HOST_IP=${MM_EMBEDDING_PORT_MICROSERVICE}
59
+ - TEI_EMBEDDING_HOST_IP=${host_ip} # ${TEI_EMBEDDING_HOST_IP}
99
60
- EMBEDDER_PORT=${EMBEDDER_PORT}
100
61
101
62
ipc : host
@@ -139,8 +100,7 @@ services:
139
100
HUGGINGFACEHUB_API_TOKEN : ${HUGGINGFACEHUB_API_TOKEN}
140
101
LOGFLAG : true
141
102
restart : unless-stopped
142
-
143
-
103
+
144
104
tei-embedding-serving :
145
105
image : ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
146
106
container_name : tei-embedding-serving
@@ -204,7 +164,4 @@ services:
204
164
205
165
networks :
206
166
default :
207
- driver : bridge
208
-
209
-
210
-
167
+ driver : bridge
0 commit comments