Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
033aa6e
init
Oseltamivir Feb 20, 2026
c177baa
add mat
Oseltamivir Feb 20, 2026
6988322
Increase Eval Conc
Oseltamivir Feb 20, 2026
c0d008b
8k1k evals instead of 1k1k
Oseltamivir Feb 20, 2026
d73bf3d
reduce conc
Oseltamivir Feb 20, 2026
1a6e76c
Merge branch 'main' into multinode_eval
Oseltamivir Feb 25, 2026
e63dbf4
Merge branch 'main' into multinode_eval
Oseltamivir Feb 25, 2026
ab179c7
Eval table missing spec decode
Oseltamivir Feb 25, 2026
d965a51
Merge branch 'main' into multinode_eval
Oseltamivir Mar 1, 2026
22a341c
Merge branch 'main' into multinode_eval
Oseltamivir Mar 11, 2026
8662931
fix: force-reinstall pinned lm-eval to override Docker image version
Oseltamivir Mar 11, 2026
d44f10d
add fp8 disagg no-DPA eval config to isolate DPA as variable
Oseltamivir Mar 11, 2026
0c6f500
Merge branch 'main' into multinode_eval
Oseltamivir Mar 13, 2026
e5c63dc
nvda evals
Oseltamivir Mar 26, 2026
94864f9
Merge branch 'main' into multinode_eval
Oseltamivir Mar 27, 2026
7215f1f
merge main
Oseltamivir Mar 30, 2026
df91368
Merge branch 'main' into multinode_eval
Oseltamivir Mar 30, 2026
8d26331
update multinode to singlenode
Oseltamivir Mar 31, 2026
0b27187
hanging rm rf
Oseltamivir Mar 31, 2026
056a415
debug
Oseltamivir Mar 31, 2026
61f7d9b
update conc req
Oseltamivir Mar 31, 2026
ffdd49b
documentation
Oseltamivir Apr 1, 2026
7639f3d
median instead of max
Oseltamivir Apr 1, 2026
4ffd505
config file guard
Oseltamivir Apr 1, 2026
0d0e1e8
h100/h200/b200/b300 evals
Oseltamivir Apr 1, 2026
bf615b9
Update repo
Oseltamivir Apr 1, 2026
28a75a2
models_name
Oseltamivir Apr 1, 2026
98a45e9
model config
Oseltamivir Apr 2, 2026
de54974
summary table
Oseltamivir Apr 2, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 34 additions & 2 deletions .github/configs/amd-master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,38 @@ dsr1-fp8-mi355x-atom-mtp:
search-space:
- { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }

# Eval-only: fp8 disagg WITHOUT DPA — isolates DPA as variable
dsr1-fp8-mi355x-sglang-disagg-nodpa-eval:
image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2
model: deepseek-ai/DeepSeek-R1-0528
model-prefix: dsr1
runner: mi355x-disagg
precision: fp8
framework: sglang-disagg
multinode: true
disagg: true
seq-len-configs:
- isl: 8192
osl: 1024
search-space:
- spec-decoding: "none"
conc-list: [ 256 ]
prefill:
num-worker: 1
tp: 8
ep: 1
dp-attn: false
additional-settings:
- "PREFILL_NODES=1"
decode:
num-worker: 2
tp: 8
ep: 1
dp-attn: false
additional-settings:
- "DECODE_NODES=2"
- "DECODE_MTP_SIZE=0"

dsr1-fp8-mi355x-sglang-disagg:
image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2
model: deepseek-ai/DeepSeek-R1-0528
Expand Down Expand Up @@ -814,7 +846,7 @@ dsr1-fp8-mi355x-sglang-disagg-mtp:


dsr1-fp4-mi355x-sglang-disagg:
image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-3
image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0313-2
model: amd/DeepSeek-R1-0528-MXFP4
model-prefix: dsr1
runner: mi355x-disagg
Expand Down Expand Up @@ -1022,7 +1054,7 @@ dsr1-fp4-mi355x-sglang-disagg:
- "DECODE_MTP_SIZE=0"

dsr1-fp4-mi355x-sglang-disagg-mtp:
image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-3
image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0313-2
model: amd/DeepSeek-R1-0528-MXFP4
model-prefix: dsr1
runner: mi355x-disagg
Expand Down
72 changes: 65 additions & 7 deletions .github/workflows/benchmark-multinode-tmpl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,20 @@ on:
required: false
type: string
default: "[]"
run-eval:
type: boolean
required: false
default: false
eval-only:
description: "Run only evals (skip throughput benchmark)"
type: boolean
required: false
default: false
eval-conc:
description: "Concurrency to use for eval requests (overrides default max-of-conc-list)"
type: string
required: false
default: ""
ref:
description: "Git ref (branch/sha) to checkout"
required: false
Expand All @@ -96,6 +110,9 @@ env:
CONC_LIST: ${{ join(fromJson(inputs.conc-list), ' ') }}
SPEC_DECODING: ${{ inputs.spec-decoding }}
DISAGG: ${{ inputs.disagg }}
RUN_EVAL: ${{ inputs.run-eval }}
EVAL_ONLY: ${{ inputs.eval-only }}
EVAL_CONC: ${{ inputs.eval-conc }}
PYTHONDONTWRITEBYTECODE: '1'
PYTHONPYCACHEPREFIX: /tmp/inferencex-pycache

Expand All @@ -116,7 +133,7 @@ jobs:
benchmark:
runs-on: ${{ inputs.runner }}
timeout-minutes: 480
name: "${{ inputs.exp-name }} ${{ inputs.precision }} ${{ inputs.runner }} ${{ inputs.framework }} | P(tp${{ inputs.prefill-tp }}/ep${{ inputs.prefill-ep }}/dp${{ inputs.prefill-dp-attn }}/nw${{ inputs.prefill-num-worker }}) D(tp${{ inputs.decode-tp }}/ep${{ inputs.decode-ep }}/dp${{ inputs.decode-dp-attn }}/nw${{ inputs.decode-num-worker }}) | disagg-${{ inputs.disagg }} spec-${{ inputs.spec-decoding }} conc-${{ join(fromJson(inputs.conc-list), 'x') }}"
name: "${{ inputs.exp-name }} ${{ inputs.precision }} ${{ inputs.runner }} ${{ inputs.framework }} | P(tp${{ inputs.prefill-tp }}/ep${{ inputs.prefill-ep }}/dp${{ inputs.prefill-dp-attn }}/nw${{ inputs.prefill-num-worker }}) D(tp${{ inputs.decode-tp }}/ep${{ inputs.decode-ep }}/dp${{ inputs.decode-dp-attn }}/nw${{ inputs.decode-num-worker }}) | disagg-${{ inputs.disagg }} spec-${{ inputs.spec-decoding }} conc-${{ join(fromJson(inputs.conc-list), 'x') }}${{ inputs.eval-only && ' | eval-only' || (inputs.run-eval && ' | eval' || '') }}"

steps:
- name: Slurm cleanup (pre-run)
Expand Down Expand Up @@ -146,9 +163,17 @@ jobs:
ref: ${{ inputs.ref || github.ref }}
clean: false

- name: Cleanup stale eval outputs (pre-run)
if: ${{ inputs.run-eval || inputs.eval-only }}
run: |
rm -f meta_env.json || true
rm -f results*.json || true
rm -f sample*.jsonl || true

- name: Launch multi-node job script
env:
RUNNER_NAME: ${{ runner.name }}
RUNNER_TYPE: ${{ inputs.runner }}
# Hash uniquely on {EXP_NAME}_{PRECISION}_{FRAMEWORK}_prefill-tp{}-ep{}-dp{}-nw{}_decode-tp{}-ep{}-dp{}-nw{}_disagg-{}_spec-{}_conc{}_{runner}
RESULT_FILENAME: ${{ env.EXP_NAME }}_${{ env.PRECISION }}_${{ env.FRAMEWORK }}_prefill-tp${{ env.PREFILL_TP }}-ep${{ env.PREFILL_EP }}-dp${{ env.PREFILL_DP_ATTN }}-nw${{ env.PREFILL_NUM_WORKERS }}_decode-tp${{ env.DECODE_TP }}-ep${{ env.DECODE_EP }}-dp${{ env.DECODE_DP_ATTN }}-nw${{ env.DECODE_NUM_WORKERS }}_disagg-${{ env.DISAGG }}_spec-${{ env.SPEC_DECODING }}_conc${{ join(fromJson(inputs.conc-list), 'x') }}_${{ runner.name }}
run: |
Expand All @@ -159,16 +184,26 @@ jobs:
export ${{ join(fromJson(inputs.prefill-additional-settings), ' ') }} ${{ join(fromJson(inputs.decode-additional-settings), ' ') }}
export IS_MULTINODE=true
bash ./runners/launch_${RUNNER_NAME%%_*}.sh
# Check if at least one result file was created
if ls ${RESULT_FILENAME}_*.json 1> /dev/null 2>&1; then
echo "RESULT_FILENAME=${RESULT_FILENAME}" >> $GITHUB_ENV
echo "Found result files: $(ls ${RESULT_FILENAME}_*.json)"
if [ "${{ inputs.eval-only }}" = "true" ]; then
echo "Eval-only mode: skipping benchmark result file check"
# Verify eval produced results
if ! ls results*.json 1>/dev/null 2>&1; then
echo "Eval-only run failed: no results*.json files found." >&2
exit 1
fi
else
echo "Run failed: No benchmark result files found for ${RESULT_FILENAME}_*.json" >&2
exit 1
# Check if at least one result file was created
if ls ${RESULT_FILENAME}_*.json 1> /dev/null 2>&1; then
echo "RESULT_FILENAME=${RESULT_FILENAME}" >> $GITHUB_ENV
echo "Found result files: $(ls ${RESULT_FILENAME}_*.json)"
else
echo "Run failed: No benchmark result files found for ${RESULT_FILENAME}_*.json" >&2
exit 1
fi
fi

- name: Process results
if: ${{ !inputs.eval-only }}
env:
RUNNER_TYPE: ${{ inputs.runner }}
run: |
Expand All @@ -189,11 +224,34 @@ jobs:
done

- name: Upload results
if: ${{ !inputs.eval-only }}
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: bmk_${{ env.RESULT_FILENAME }}
path: agg_${{ env.RESULT_FILENAME }}_*.json

- name: Upload eval results (if any)
if: ${{ always() && (env.RUN_EVAL == 'true' || inputs.eval-only) }}
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: eval_${{ env.EXP_NAME }}_${{ env.RESULT_FILENAME }}
path: |
meta_env.json
results*.json
sample*.jsonl
if-no-files-found: ignore

- name: Verify eval scores
if: ${{ inputs.eval-only }}
run: python3 utils/evals/validate_scores.py

- name: Cleanup eval outputs (post-upload)
if: ${{ always() && (inputs.run-eval || inputs.eval-only) }}
run: |
rm -f meta_env.json || true
rm -f results*.json || true
rm -f sample*.jsonl || true

- name: Upload logs
if: always()
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
Expand Down
7 changes: 7 additions & 0 deletions .github/workflows/benchmark-tmpl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,13 @@ jobs:
ref: ${{ inputs.ref || github.ref }}
clean: false

- name: Cleanup stale eval outputs (pre-run)
if: ${{ inputs.run-eval || inputs.eval-only }}
run: |
rm -f meta_env.json || true
rm -f results*.json || true
rm -f sample*.jsonl || true
- name: Launch job script
env:
RUNNER_NAME: ${{ runner.name }}
Expand Down
53 changes: 49 additions & 4 deletions .github/workflows/e2e-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
single-node-config: ${{ steps.get-jobs.outputs.single-node-config }}
multi-node-config: ${{ steps.get-jobs.outputs.multi-node-config }}
eval-config: ${{ steps.get-jobs.outputs.eval-config }}
multi-node-eval-config: ${{ steps.get-jobs.outputs.multi-node-eval-config }}
steps:
- name: Checkout code (ref)
if: ${{ inputs.ref && inputs.ref != '' }}
Expand All @@ -55,11 +56,13 @@
CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix_logic/generate_sweep_configs.py \
${{ inputs.generate-cli-command || github.event.inputs.generate-cli-command }})
SINGLE=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' not in x and not x.get('run-eval', False)]))")
MULTI=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' in x]))")
MULTI=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' in x and not x.get('run-eval', False)]))")
EVALS=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' not in x and x.get('run-eval', False)]))")
MULTI_EVAL=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' in x and x.get('run-eval', False)]))")
echo "single-node-config=$SINGLE" >> $GITHUB_OUTPUT
echo "multi-node-config=$MULTI" >> $GITHUB_OUTPUT
echo "eval-config=$EVALS" >> $GITHUB_OUTPUT
echo "multi-node-eval-config=$MULTI_EVAL" >> $GITHUB_OUTPUT

test-sweep-multi-node:
needs: get-jobs
Expand Down Expand Up @@ -97,9 +100,51 @@
decode-ep: ${{ matrix.config.decode.ep }}
decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
run-eval: false
ref: ${{ inputs.ref }}

test-sweep-multi-node-evals:
needs: get-jobs
if: ${{ needs.get-jobs.outputs.multi-node-eval-config != '[]' }}
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
name: multi-node eval /
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.get-jobs.outputs.multi-node-eval-config) }}
secrets: inherit
with:
isl: ${{ matrix.config.isl }}
osl: ${{ matrix.config.osl }}
max-model-len: ${{ matrix.config.max-model-len }}
runner: ${{ matrix.config.runner }}
image: ${{ matrix.config.image }}
model: ${{ matrix.config.model }}
model-prefix: ${{ matrix.config.model-prefix }}
framework: ${{ matrix.config.framework }}
precision: ${{ matrix.config.precision }}
exp-name: ${{ matrix.config.exp-name }}
conc-list: ${{ toJson(matrix.config.conc) }}
spec-decoding: ${{ matrix.config.spec-decoding }}
disagg: ${{ matrix.config.disagg }}

prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
prefill-tp: ${{ matrix.config.prefill.tp }}
prefill-ep: ${{ matrix.config.prefill.ep }}
prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}

decode-num-worker: ${{ matrix.config.decode.num-worker }}
decode-tp: ${{ matrix.config.decode.tp }}
decode-ep: ${{ matrix.config.decode.ep }}
decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
run-eval: true
eval-only: true
eval-conc: ${{ matrix.config.eval-conc }}
ref: ${{ inputs.ref }}

test-sweep-single-node:

Check warning

Code scanning / CodeQL

Workflow does not contain permissions Medium

Actions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {}
needs: get-jobs
if: ${{ needs.get-jobs.outputs.single-node-config != '[]' }}
uses: ./.github/workflows/benchmark-tmpl.yml
Expand Down Expand Up @@ -162,19 +207,19 @@

collect-results:
needs: [test-sweep-multi-node, test-sweep-single-node]
if: ${{ always() }}
if: ${{ always() && (needs.test-sweep-multi-node.result != 'skipped' || needs.test-sweep-single-node.result != 'skipped') }}
uses: ./.github/workflows/collect-results.yml
secrets: inherit
with:
result-prefix: "bmk"

collect-evals:
needs: [test-sweep-evals]
if: ${{ always() && needs.test-sweep-evals.result != 'skipped' }}
needs: [test-sweep-evals, test-sweep-multi-node-evals]
if: ${{ always() && (needs.test-sweep-evals.result != 'skipped' || needs.test-sweep-multi-node-evals.result != 'skipped') }}
uses: ./.github/workflows/collect-evals.yml
secrets: inherit

calc-success-rate:

Check warning

Code scanning / CodeQL

Workflow does not contain permissions Medium

Actions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {}
needs: [collect-results, collect-evals]
if: ${{ always() }}
runs-on: ubuntu-latest
Expand Down
43 changes: 41 additions & 2 deletions .github/workflows/run-sweep.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@
decode-ep: ${{ matrix.config.decode.ep }}
decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
run-eval: false

sweep-multi-node-8k1k:
needs: setup
Expand Down Expand Up @@ -189,7 +190,45 @@
run-eval: true
eval-only: true

sweep-multi-node-evals:
needs: setup
if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != '[]' && toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != 'null' }}
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
name: multi-node eval /
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.setup.outputs.search-space-config).multinode_evals }}
secrets: inherit
with:
exp-name: ${{ matrix.config.exp-name }}
isl: ${{ matrix.config.isl }}
osl: ${{ matrix.config.osl }}
max-model-len: ${{ matrix.config.max-model-len }}
runner: ${{ matrix.config.runner }}
image: ${{ matrix.config.image }}
model: ${{ matrix.config.model }}
model-prefix: ${{ matrix.config.model-prefix }}
framework: ${{ matrix.config.framework }}
precision: ${{ matrix.config.precision }}
conc-list: ${{ toJson(matrix.config.conc) }}
spec-decoding: ${{ matrix.config.spec-decoding }}
disagg: ${{ matrix.config.disagg }}
prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
prefill-tp: ${{ matrix.config.prefill.tp }}
prefill-ep: ${{ matrix.config.prefill.ep }}
prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
decode-num-worker: ${{ matrix.config.decode.num-worker }}
decode-tp: ${{ matrix.config.decode.tp }}
decode-ep: ${{ matrix.config.decode.ep }}
decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
run-eval: true
eval-only: true
eval-conc: ${{ matrix.config.eval-conc }}

collect-results:

Check warning

Code scanning / CodeQL

Workflow does not contain permissions Medium

Actions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {}
needs:
[
sweep-single-node-1k1k,
Expand All @@ -205,12 +244,12 @@
result-prefix: "bmk"

collect-evals:
needs: [sweep-evals, setup]
if: ${{ always() && needs.setup.result != 'skipped' && needs.sweep-evals.result != 'skipped' }}
needs: [sweep-evals, sweep-multi-node-evals, setup]
if: ${{ always() && needs.setup.result != 'skipped' && (needs.sweep-evals.result != 'skipped' || needs.sweep-multi-node-evals.result != 'skipped') }}
uses: ./.github/workflows/collect-evals.yml
secrets: inherit

upload-changelog-metadata:

Check warning

Code scanning / CodeQL

Workflow does not contain permissions Medium

Actions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {}
needs: [setup, collect-results]
if: ${{ always() && needs.setup.result != 'skipped' }}
runs-on: ubuntu-latest
Expand Down
Loading
Loading