updates

MSCetin37 · MSCetin37 · commit 29aa1899bc8f · 2025-03-14T17:02:30.000-07:00
Signed-off-by: Mustafa &lt;mustafa.cetin@intel.com&gt;
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -4,13 +4,11 @@
 services:
   tgi-service:
     image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
-    container_name: tgi-server
-    profiles:
-      - codegen-xeon-tgi
+    container_name: tgi-service
     ports:
       - "8028:80"
     volumes:
-      - "${MODEL_CACHE:-./data}:/data"
+      - "./data:/data"
     shm_size: 1g
     environment:
       no_proxy: ${no_proxy}
@@ -24,78 +22,41 @@ services:
       timeout: 10s
       retries: 100
     command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
-  vllm-service:
-    image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
-    container_name: vllm-server
-    profiles:
-      - codegen-xeon-vllm
-    ports:
-      - "8028:80"
-    volumes:
-      - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
-    shm_size: 1g
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      host_ip: ${host_ip}
-    healthcheck:
-      test: ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"]
-      interval: 10s
-      timeout: 10s
-      retries: 100
-    command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80
-  llm-base:
+  llm:
     image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
     container_name: llm-textgen-server
+    depends_on:
+      tgi-service:
+        condition: service_healthy
+    ports:
+      - "9000:9000"
+    ipc: host
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      LLM_ENDPOINT: ${LLM_ENDPOINT}
+      LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
-  llm-tgi-service:
-    extends: llm-base
-    container_name: llm-codegen-tgi-server
-    profiles:
-      - codegen-xeon-tgi
-    ports:
-      - "9000:9000"
-    ipc: host
-    depends_on:
-      tgi-service:
-        condition: service_healthy
-  llm-vllm-service:
-    extends: llm-base
-    container_name: llm-codegen-vllm-server
-    profiles:
-      - codegen-xeon-vllm
-    ports:
-      - "9000:9000"
-    ipc: host
-    depends_on:
-      vllm-service:
-        condition: service_healthy
   codegen-xeon-backend-server:
     image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
     container_name: codegen-xeon-backend-server
     depends_on:
-      - llm-base
+      - llm
     ports:
       - "7778:7778"
     environment:
       - no_proxy=${no_proxy}
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
-      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
-      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
-      - RETRIEVAL_SERVICE_HOST_IP=${RETRIEVAL_SERVICE_HOST_IP}
+      - MEGA_SERVICE_HOST_IP=${host_ip} #${MEGA_SERVICE_HOST_IP}
+      - LLM_SERVICE_HOST_IP=${host_ip} #${LLM_SERVICE_HOST_IP}
+      # - RETRIEVAL_SERVICE_HOST_IP=${REDIS_RETRIEVER_PORT}
+      - RETRIEVAL_SERVICE_HOST_IP=${host_ip} #${RETRIEVAL_SERVICE_HOST_IP}
       - REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT}
-      - TEI_EMBEDDING_HOST_IP=${TEI_EMBEDDING_HOST_IP}
+      # - MM_EMBEDDING_SERVICE_HOST_IP=${MM_EMBEDDING_PORT_MICROSERVICE}
+      - TEI_EMBEDDING_HOST_IP=${host_ip} #${TEI_EMBEDDING_HOST_IP}
       - EMBEDDER_PORT=${EMBEDDER_PORT}
 
     ipc: host
@@ -139,8 +100,7 @@ services:
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       LOGFLAG: true
     restart: unless-stopped
-  
-  
+    
   tei-embedding-serving:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-serving
@@ -204,7 +164,4 @@ services:
 
 networks:
   default:
-    driver: bridge
-
-
-
+    driver: bridge
diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh
@@ -26,6 +26,7 @@ export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
 export LLM_SERVICE_PORT=9000
 export LLM_ENDPOINT="http://${host_ip}:8028"
 export LLM_SERVICE_HOST_IP=${host_ip}
+export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
 
 export MEGA_SERVICE_PORT=7778
 export MEGA_SERVICE_HOST_IP=${host_ip}
@@ -39,7 +40,6 @@ export RETRIEVAL_SERVICE_HOST_IP=${host_ip}
 export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS"
 export INDEX_NAME="CodeGen"
 
-
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export EMBEDDER_PORT=6000
 export TEI_EMBEDDER_PORT=8090