Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions buildkite/test-template-ci.j2
Original file line number Diff line number Diff line change
Expand Up @@ -471,24 +471,24 @@ steps:

{% for step in steps %}
{% if step.mirror_hardwares and mirror_hw in step.mirror_hardwares %}
- label: "AMD MI300: {{ step.label }}"
- label: "{{ step.agent_pool }}: {{ step.label }}"
depends_on: amd-build
agents:
{% if step.label and step.label=="Benchmarks" or step.label=="Kernels Attention Test %N" or step.label=="LoRA Test %N" or step.label=="Kernels Quantization Test %N" %}
queue: amd_mi325_8
{% elif step.label=="Distributed Tests (4 GPUs)" or step.label=="2 Node Tests (4 GPUs in total)" or step.label=="Multi-step Tests (4 GPUs)" or step.label=="Pipeline Parallelism Test" or step.label=="LoRA TP Test (Distributed)" %}
queue: amd_mi325_4
{% elif step.label=="Distributed Comm Ops Test" or step.label=="Distributed Tests (2 GPUs)" or step.label=="Plugin Tests (2 GPUs)" or step.label=="Weight Loading Multiple GPU Test" or step.label=="Weight Loading Multiple GPU Test - Large Models" %}
queue: amd_mi325_2
{% else %}
queue: amd_mi325_1
{% endif%}
{% if step.agent_pool %}
queue: {{ step.agent_pool }}
{% else %}
queue: amd_mi325_1
{% endif %}
command: bash .buildkite/scripts/hardware_ci/run-amd-test.sh "(command rocm-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" && ")) | safe }}"
env:
DOCKER_BUILDKIT: "1"
priority: 100
{% if step.label and step.label=="Regression Test" or step.label=="Engine Test" %}
soft_fail: false
{% endif %}
{% else %}
soft_fail: true
{% endif%}
{% endif %}
{% endfor %}
{% for step in steps %}
# removed because of lack of HW resources: step.label and step.label=="Benchmarks" or step.label=="Pipeline Parallelism Test" or
Expand Down
12 changes: 8 additions & 4 deletions buildkite/test-template-fastcheck.j2
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ steps:
steps:
- label: "AMD: :docker: build image with {{mirror_hw}}"
depends_on: ~
soft_fail: true
soft_fail: false
commands:
- "docker build --build-arg max_jobs=16 --build-arg REMOTE_VLLM=1 --build-arg ARG_PYTORCH_ROCM_ARCH='gfx90a;gfx942' --build-arg VLLM_BRANCH=$BUILDKITE_COMMIT --tag {{ docker_image_amd }} -f docker/Dockerfile.rocm --target test --no-cache --progress plain ."
- "docker push {{ docker_image_amd }}"
Expand All @@ -346,14 +346,18 @@ steps:
{% for step in steps %}
{% if step.mirror_hardwares and mirror_hw in step.mirror_hardwares %}
{% if step.label and step.label=="Basic Correctness Test" %}
- block: "Run AMD MI300: {{ step.label }} with {{mirror_hw}}"
- block: "{{ step.agent_pool }}: {{ step.label }}"
key: block-amd-{{ step.label | replace(" ", "-") | lower | replace("(", "") | replace(")", "") | replace("%", "") | replace(",", "-") | replace("+", "-") }}
depends_on: amd-build

- label: "AMD MI300: {{ step.label }} with {{mirror_hw}}"
- label: "{{ step.agent_pool }}: {{ step.label }}"
depends_on: block-amd-{{ step.label | replace(" ", "-") | lower | replace("(", "") | replace(")", "") | replace("%", "") | replace(",", "-") | replace("+", "-") }}
agents:
queue: amd_mi300_1
{% if step.agent_pool %}
queue: {{ step.agent_pool }}
{% else %}
queue: amd_mi325_1
{% endif %}
command: bash .buildkite/scripts/hardware_ci/run-amd-test.sh "(command rocm-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" && ")) | safe }}"
env:
DOCKER_BUILDKIT: "1"
Expand Down