Merge branch 'main' into codegen/rag_agents

MSCetin37 · web-flow · commit 16cf76b6fdc4 · 2025-04-08T21:43:41.000-07:00
diff --git a/.github/workflows/dockerhub-description.yml b/.github/workflows/dockerhub-description.yml
diff --git a/AvatarChatbot/tests/test_compose_on_gaudi.sh b/AvatarChatbot/tests/test_compose_on_gaudi.sh
@@ -86,15 +86,16 @@ function start_services() {
     docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
     n=0
     until [[ "$n" -ge 200 ]]; do
-       docker logs tgi-gaudi-server > $LOG_PATH/tgi_service_start.log
-       if grep -q Connected $LOG_PATH/tgi_service_start.log; then
+       docker logs tgi-gaudi-server > $LOG_PATH/tgi_service_start.log && docker logs whisper-service 2>&1 | tee $LOG_PATH/whisper_service_start.log && docker logs speecht5-service 2>&1 | tee $LOG_PATH/speecht5_service_start.log
+       if grep -q Connected $LOG_PATH/tgi_service_start.log && grep -q running $LOG_PATH/whisper_service_start.log && grep -q running $LOG_PATH/speecht5_service_start.log; then
            break
        fi
-       sleep 5s
+       sleep 10s
        n=$((n+1))
     done
     echo "All services are up and running"
-    sleep 5s
+    # sleep 5s
+    sleep 1m
 }
 
 
diff --git a/AvatarChatbot/tests/test_compose_on_xeon.sh b/AvatarChatbot/tests/test_compose_on_xeon.sh
@@ -85,15 +85,16 @@ function start_services() {
     # Start Docker Containers
     docker compose up -d
     n=0
-    until [[ "$n" -ge 100 ]]; do
-       docker logs tgi-service > $LOG_PATH/tgi_service_start.log
-       if grep -q Connected $LOG_PATH/tgi_service_start.log; then
+    until [[ "$n" -ge 200 ]]; do
+       docker logs tgi-service > $LOG_PATH/tgi_service_start.log && docker logs whisper-service 2>&1 | tee $LOG_PATH/whisper_service_start.log && docker logs speecht5-service 2>&1 | tee $LOG_PATH/speecht5_service_start.log
+       if grep -q Connected $LOG_PATH/tgi_service_start.log && grep -q running $LOG_PATH/whisper_service_start.log && grep -q running $LOG_PATH/speecht5_service_start.log; then
            break
        fi
-       sleep 5s
+       sleep 10s
        n=$((n+1))
     done
     echo "All services are up and running"
+    sleep 1m
 }
 
 
@@ -104,6 +105,7 @@ function validate_megaservice() {
     if [[ $result == *"mp4"* ]]; then
         echo "Result correct."
     else
+        echo "Result wrong, print docker logs."
         docker logs whisper-service > $LOG_PATH/whisper-service.log
         docker logs speecht5-service > $LOG_PATH/speecht5-service.log
         docker logs tgi-service > $LOG_PATH/tgi-service.log
@@ -117,19 +119,13 @@ function validate_megaservice() {
 }
 
 
-#function validate_frontend() {
-
-#}
-
-
 function stop_docker() {
     cd $WORKPATH/docker_compose/intel/cpu/xeon
     docker compose down
 }
 
 
 function main() {
-
     stop_docker
     if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
     start_services
diff --git a/ChatQnA/README.md b/ChatQnA/README.md
@@ -15,13 +15,14 @@ RAG bridges the knowledge gap by dynamically fetching relevant information from
 
 ## 🤖 Automated Terraform Deployment using Intel® Optimized Cloud Modules for **Terraform**
 
-| Cloud Provider       | Intel Architecture                | Intel Optimized Cloud Module for Terraform                                                                                         | Comments                                                             |
-| -------------------- | --------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------- |
-| AWS                  | 4th Gen Intel Xeon with Intel AMX | [AWS Module](https://github.com/intel/terraform-intel-aws-vm/tree/main/examples/gen-ai-xeon-opea-chatqna)                          | Uses meta-llama/Meta-Llama-3-8B-Instruct by default                  |
-| AWS Falcon2-11B      | 4th Gen Intel Xeon with Intel AMX | [AWS Module with Falcon11B](https://github.com/intel/terraform-intel-aws-vm/tree/main/examples/gen-ai-xeon-opea-chatqna-falcon11B) | Uses TII Falcon2-11B LLM Model                                       |
-| GCP                  | 5th Gen Intel Xeon with Intel AMX | [GCP Module](https://github.com/intel/terraform-intel-gcp-vm/tree/main/examples/gen-ai-xeon-opea-chatqna)                          | Also supports Confidential AI by using Intel® TDX with 4th Gen Xeon |
-| Azure                | 5th Gen Intel Xeon with Intel AMX | Work-in-progress                                                                                                                   | Work-in-progress                                                     |
-| Intel Tiber AI Cloud | 5th Gen Intel Xeon with Intel AMX | Work-in-progress                                                                                                                   | Work-in-progress                                                     |
+| Cloud Provider       | Intel Architecture                                | Intel Optimized Cloud Module for Terraform                                                                                             | Comments                                                        |
+| -------------------- | ------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- |
+| AWS                  | 4th Gen Intel Xeon with Intel AMX                 | [AWS Deployment](https://github.com/intel/terraform-intel-aws-vm/tree/main/examples/gen-ai-xeon-opea-chatqna)                          | Uses meta-llama/Meta-Llama-3-8B-Instruct by default             |
+| AWS Falcon2-11B      | 4th Gen Intel Xeon with Intel AMX                 | [AWS Deployment with Falcon11B](https://github.com/intel/terraform-intel-aws-vm/tree/main/examples/gen-ai-xeon-opea-chatqna-falcon11B) | Uses TII Falcon2-11B LLM Model                                  |
+| AWS Falcon3          | 4th Gen Intel Xeon with Intel AMX                 | [AWS Deployment with Falcon3](https://github.com/intel/terraform-intel-aws-vm/tree/main/examples/gen-ai-xeon-opea-chatqna-falcon3)     | Uses TII Falcon3 LLM Model                                      |
+| GCP                  | 4th/5th Gen Intel Xeon with Intel AMX & Intel TDX | [GCP Deployment](https://github.com/intel/terraform-intel-gcp-vm/tree/main/examples/gen-ai-xeon-opea-chatqna)                          | Supports Confidential AI by using Intel® TDX with 4th Gen Xeon |
+| Azure                | 4th/5th Gen Intel Xeon with Intel AMX & Intel TDX | [Azure Deployment](https://github.com/intel/terraform-intel-azure-linux-vm/tree/main/examples/azure-gen-ai-xeon-opea-chatqna-tdx)      | Supports Confidential AI by using Intel® TDX with 4th Gen Xeon |
+| Intel Tiber AI Cloud | 5th Gen Intel Xeon with Intel AMX                 | Work-in-progress                                                                                                                       | Work-in-progress                                                |
 
 ## Automated Deployment to Ubuntu based system (if not using Terraform) using Intel® Optimized Cloud Modules for **Ansible**
 
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -96,6 +96,7 @@ services:
       HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
       VLLM_TORCH_PROFILER_DIR: "/mnt"
+      VLLM_CPU_KVCACHE_SPACE: 40
     healthcheck:
       test: ["CMD-SHELL", "curl -f http://$host_ip:9009/health || exit 1"]
       interval: 10s
@@ -124,7 +125,7 @@ services:
       - RERANK_SERVER_HOST_IP=tei-reranking-service
       - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
       - LLM_SERVER_HOST_IP=vllm-service
-      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_SERVER_PORT=80
       - LLM_MODEL=${LLM_MODEL_ID}
       - LOGFLAG=${LOGFLAG}
     ipc: host
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml
@@ -183,7 +183,7 @@ services:
       - RERANK_SERVER_HOST_IP=tei-reranking-service
       - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
       - LLM_SERVER_HOST_IP=vllm-service
-      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_SERVER_PORT=80
       - LLM_MODEL=${LLM_MODEL_ID}
       - LOGFLAG=${LOGFLAG}
     ipc: host
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml
@@ -107,7 +107,7 @@ services:
       - RERANK_SERVER_HOST_IP=tei-reranking-service
       - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
       - LLM_SERVER_HOST_IP=vllm-service
-      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_SERVER_PORT=80
       - LOGFLAG=${LOGFLAG}
       - LLM_MODEL=${LLM_MODEL_ID}
     ipc: host
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml
@@ -113,7 +113,7 @@ services:
       - RERANK_SERVER_HOST_IP=tei-reranking-service
       - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
       - LLM_SERVER_HOST_IP=vllm-service
-      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_SERVER_PORT=80
       - LLM_MODEL=${LLM_MODEL_ID}
       - LOGFLAG=${LOGFLAG}
     ipc: host
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
@@ -113,7 +113,7 @@ services:
       - RERANK_SERVER_HOST_IP=tei-reranking-service
       - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
       - LLM_SERVER_HOST_IP=tgi-service
-      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_SERVER_PORT=80
       - LLM_MODEL=${LLM_MODEL_ID}
       - LOGFLAG=${LOGFLAG}
     ipc: host
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
@@ -94,7 +94,7 @@ services:
       - EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
       - RETRIEVER_SERVICE_HOST_IP=retriever
       - LLM_SERVER_HOST_IP=vllm-service
-      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_SERVER_PORT=80
       - LLM_MODEL=${LLM_MODEL_ID}
       - LOGFLAG=${LOGFLAG}
       - CHATQNA_TYPE=${CHATQNA_TYPE:-CHATQNA_NO_RERANK}
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -133,7 +133,7 @@ services:
       - RERANK_SERVER_HOST_IP=tei-reranking-service
       - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
       - LLM_SERVER_HOST_IP=vllm-service
-      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_SERVER_PORT=80
       - LLM_MODEL=${LLM_MODEL_ID}
       - LOGFLAG=${LOGFLAG}
     ipc: host
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml
@@ -166,7 +166,7 @@ services:
       - RERANK_SERVER_HOST_IP=tei-reranking-service
       - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
       - LLM_SERVER_HOST_IP=vllm-service
-      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_SERVER_PORT=80
       - LLM_MODEL=${LLM_MODEL_ID}
       - LOGFLAG=${LOGFLAG}
       - CHATQNA_TYPE=${CHATQNA_TYPE:-CHATQNA_GUARDRAILS}
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
@@ -127,7 +127,7 @@ services:
       - RERANK_SERVER_HOST_IP=tei-reranking-service
       - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
       - LLM_SERVER_HOST_IP=tgi-service
-      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_SERVER_PORT=80
       - LLM_MODEL=${LLM_MODEL_ID}
       - LOGFLAG=${LOGFLAG}
     ipc: host
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml
@@ -99,7 +99,7 @@ services:
       - EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
       - RETRIEVER_SERVICE_HOST_IP=retriever
       - LLM_SERVER_HOST_IP=vllm-service
-      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LLM_SERVER_PORT=80
       - LLM_MODEL=${LLM_MODEL_ID}
       - LOGFLAG=${LOGFLAG}
       - CHATQNA_TYPE=${CHATQNA_TYPE:-CHATQNA_NO_RERANK}
diff --git a/CodeGen/README.md b/CodeGen/README.md
@@ -89,7 +89,16 @@ flowchart LR
     DP <-.->VDB
 ```
 
-## Deploy CodeGen Service
+## 🤖 Automated Terraform Deployment using Intel® Optimized Cloud Modules for **Terraform**
+
+| Cloud Provider       | Intel Architecture                | Intel Optimized Cloud Module for Terraform                                                                    | Comments |
+| -------------------- | --------------------------------- | ------------------------------------------------------------------------------------------------------------- | -------- |
+| AWS                  | 4th Gen Intel Xeon with Intel AMX | [AWS Deployment](https://github.com/intel/terraform-intel-aws-vm/tree/main/examples/gen-ai-xeon-opea-codegen) |          |
+| GCP                  | 4th/5th Gen Intel Xeon            | [GCP Deployment](https://github.com/intel/terraform-intel-gcp-vm/tree/main/examples/gen-ai-xeon-opea-codegen) |          |
+| Azure                | 4th/5th Gen Intel Xeon            | Work-in-progress                                                                                              |          |
+| Intel Tiber AI Cloud | 5th Gen Intel Xeon with Intel AMX | Work-in-progress                                                                                              |          |
+
+## Manual Deployment of CodeGen Service
 
 The CodeGen service can be effortlessly deployed on either Intel Gaudi2 or Intel Xeon Scalable Processor.