88 branches :
99 - main
1010 - " pull-request/[0-9]+"
11+ - release/*.*.*
1112
1213concurrency :
1314 group : ${{ github.workflow }}-build-test-${{ github.ref_name || github.run_id }}
3738 echo '${{ toJson(needs) }}' | jq -e 'to_entries | map(.value.result) | all(. as $result | ["success", "skipped"] | any($result == .))'
3839
3940 vllm :
40- runs-on : gpu-l40-amd64
4141 needs : changed-files
4242 if : needs.changed-files.outputs.has_code_changes == 'true'
43+ strategy :
44+ fail-fast : false
45+ matrix :
46+ platform :
47+ - { arch: amd64, runner: gpu-l40-amd64 }
48+ - { arch: arm64, runner: cpu-arm-r8g-4xlarge }
49+ name : vllm (${{ matrix.platform.arch }})
50+ runs-on : ${{ matrix.platform.runner }}
4351 steps :
4452 - name : Checkout code
4553 uses : actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
@@ -49,22 +57,53 @@ jobs:
4957 with :
5058 framework : vllm
5159 target : runtime
60+ platform : ' linux/${{ matrix.platform.arch }}'
5261 ngc_ci_access_token : ${{ secrets.NGC_CI_ACCESS_TOKEN }}
5362 ci_token : ${{ secrets.CI_TOKEN }}
5463 aws_default_region : ${{ secrets.AWS_DEFAULT_REGION }}
5564 sccache_s3_bucket : ${{ secrets.SCCACHE_S3_BUCKET }}
5665 aws_access_key_id : ${{ secrets.AWS_ACCESS_KEY_ID }}
5766 aws_secret_access_key : ${{ secrets.AWS_SECRET_ACCESS_KEY }}
58- - name : Run tests
67+ - name : Docker Tag and Push
68+ uses : ./.github/actions/docker-tag-push
69+ with :
70+ local_image : ${{ steps.build-image.outputs.image_tag }}
71+ push_tag : ai-dynamo/dynamo:${{ github.sha }}-vllm-${{ matrix.platform.arch }}
72+ # OPS-1145: Switch aws_push to true
73+ aws_push : ' false'
74+ azure_push : ' true'
75+ aws_account_id : ${{ secrets.AWS_ACCOUNT_ID }}
76+ aws_default_region : ${{ secrets.AWS_DEFAULT_REGION }}
77+ azure_acr_hostname : ${{ secrets.AZURE_ACR_HOSTNAME }}
78+ azure_acr_user : ${{ secrets.AZURE_ACR_USER }}
79+ azure_acr_password : ${{ secrets.AZURE_ACR_PASSWORD }}
80+ - name : Run unit tests
81+ if : ${{ matrix.platform.arch != 'arm64' }}
82+ uses : ./.github/actions/pytest
83+ with :
84+ image_tag : ${{ steps.build-image.outputs.image_tag }}
85+ pytest_marks : " unit and vllm and gpu_1"
86+ - name : Run e2e tests
87+ if : ${{ matrix.platform.arch != 'arm64' }}
5988 uses : ./.github/actions/pytest
6089 with :
6190 image_tag : ${{ steps.build-image.outputs.image_tag }}
6291 pytest_marks : " e2e and vllm and gpu_1 and not slow"
6392
6493 sglang :
65- runs-on : gpu-l40-amd64
6694 needs : changed-files
6795 if : needs.changed-files.outputs.has_code_changes == 'true'
96+ # OPS-1140: Uncomment this for sglang arm switch to wideep
97+ # strategy:
98+ # fail-fast: false
99+ # matrix:
100+ # platform:
101+ # - { arch: amd64, runner: gpu-l40-amd64 }
102+ # - { arch: arm64, runner: cpu-arm-r8g-4xlarge }
103+ # name: sglang (${{ matrix.platform.arch }})
104+ # runs-on: ${{ matrix.platform.runner }}
105+ # OPS-1140: Remove this runs-on line, replaced with the above line
106+ runs-on : gpu-l40-amd64
68107 steps :
69108 - name : Checkout repository
70109 uses : actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
@@ -74,22 +113,49 @@ jobs:
74113 with :
75114 framework : sglang
76115 target : runtime
116+ platform : ' linux/amd64'
117+ # OPS-1140: Replace the above line with the uncommented below line
118+ # platform: 'linux/${{ matrix.platform.arch }}'
77119 ngc_ci_access_token : ${{ secrets.NGC_CI_ACCESS_TOKEN }}
78120 ci_token : ${{ secrets.CI_TOKEN }}
79121 aws_default_region : ${{ secrets.AWS_DEFAULT_REGION }}
80122 sccache_s3_bucket : ${{ secrets.SCCACHE_S3_BUCKET }}
81123 aws_access_key_id : ${{ secrets.AWS_ACCESS_KEY_ID }}
82124 aws_secret_access_key : ${{ secrets.AWS_SECRET_ACCESS_KEY }}
125+ - name : Docker Tag and Push
126+ uses : ./.github/actions/docker-tag-push
127+ with :
128+ local_image : ${{ steps.build-image.outputs.image_tag }}
129+ push_tag : ai-dynamo/dynamo:${{ github.sha }}-sglang-amd64
130+ # OPS-1140: Replace the above line with the uncommented below line
131+ # push_tag: ai-dynamo/dynamo:${{ github.sha }}-sglang-${{ matrix.platform.arch }}
132+ # OPS-1145: Switch aws_push to true
133+ aws_push : ' false'
134+ azure_push : ' true'
135+ aws_account_id : ${{ secrets.AWS_ACCOUNT_ID }}
136+ aws_default_region : ${{ secrets.AWS_DEFAULT_REGION }}
137+ azure_acr_hostname : ${{ secrets.AZURE_ACR_HOSTNAME }}
138+ azure_acr_user : ${{ secrets.AZURE_ACR_USER }}
139+ azure_acr_password : ${{ secrets.AZURE_ACR_PASSWORD }}
83140 - name : Run tests
141+ # OPS-1140: Uncomment the below line
142+ # if: ${{ matrix.platform.arch != 'arm64' }}
84143 uses : ./.github/actions/pytest
85144 with :
86145 image_tag : ${{ steps.build-image.outputs.image_tag }}
87146 pytest_marks : " e2e and sglang and gpu_1"
88147
89148 trtllm :
90- runs-on : gpu-l40-amd64
91149 needs : changed-files
92150 if : needs.changed-files.outputs.has_code_changes == 'true'
151+ strategy :
152+ fail-fast : false
153+ matrix :
154+ platform :
155+ - { arch: amd64, runner: gpu-l40-amd64 }
156+ - { arch: arm64, runner: cpu-arm-r8g-4xlarge }
157+ name : trtllm (${{ matrix.platform.arch }})
158+ runs-on : ${{ matrix.platform.runner }}
93159 steps :
94160 - name : Checkout code
95161 uses : actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
@@ -99,14 +165,59 @@ jobs:
99165 with :
100166 framework : trtllm
101167 target : runtime
168+ platform : ' linux/${{ matrix.platform.arch }}'
102169 ngc_ci_access_token : ${{ secrets.NGC_CI_ACCESS_TOKEN }}
103170 ci_token : ${{ secrets.CI_TOKEN }}
104171 aws_default_region : ${{ secrets.AWS_DEFAULT_REGION }}
105172 sccache_s3_bucket : ${{ secrets.SCCACHE_S3_BUCKET }}
106173 aws_access_key_id : ${{ secrets.AWS_ACCESS_KEY_ID }}
107174 aws_secret_access_key : ${{ secrets.AWS_SECRET_ACCESS_KEY }}
175+ - name : Docker Tag and Push
176+ uses : ./.github/actions/docker-tag-push
177+ with :
178+ local_image : ${{ steps.build-image.outputs.image_tag }}
179+ push_tag : ai-dynamo/dynamo:${{ github.sha }}-trtllm-${{ matrix.platform.arch }}
180+ # OPS-1145: Switch aws_push to true
181+ aws_push : ' false'
182+ azure_push : ' true'
183+ aws_account_id : ${{ secrets.AWS_ACCOUNT_ID }}
184+ aws_default_region : ${{ secrets.AWS_DEFAULT_REGION }}
185+ azure_acr_hostname : ${{ secrets.AZURE_ACR_HOSTNAME }}
186+ azure_acr_user : ${{ secrets.AZURE_ACR_USER }}
187+ azure_acr_password : ${{ secrets.AZURE_ACR_PASSWORD }}
108188 - name : Run tests
189+ if : ${{ matrix.platform.arch != 'arm64' }}
109190 uses : ./.github/actions/pytest
110191 with :
111192 image_tag : ${{ steps.build-image.outputs.image_tag }}
112193 pytest_marks : " e2e and trtllm_marker and gpu_1 and not slow"
194+
195+ # Upload metrics for this workflow and all its jobs
196+ upload-workflow-metrics :
197+ name : Upload Workflow Metrics
198+ runs-on : gitlab
199+ if : always() # Always run, even if other jobs fail
200+ needs : [backend-status-check] # Wait for the status check which waits for all build jobs
201+
202+ steps :
203+ - name : Check out repository
204+ uses : actions/checkout@v4
205+
206+ - name : Set up Python
207+ uses : actions/setup-python@v4
208+ with :
209+ python-version : ' 3.x'
210+
211+ - name : Install dependencies
212+ run : |
213+ python -m pip install --upgrade pip
214+ pip install requests
215+ - name : Upload Complete Workflow Metrics
216+ env :
217+ GITHUB_TOKEN : ${{ secrets.GITHUB_TOKEN }}
218+ WORKFLOW_INDEX : ${{ secrets.WORKFLOW_INDEX }}
219+ JOB_INDEX : ${{ secrets.JOB_INDEX }}
220+ STEPS_INDEX : ${{ secrets.STEPS_INDEX }}
221+ run : |
222+ # Run the enhanced metrics upload script
223+ python3 .github/workflows/upload_complete_workflow_metrics.py
0 commit comments