From bc90f18026609b17afcb82104cb654c68541660e Mon Sep 17 00:00:00 2001 From: seungrokj Date: Tue, 31 Mar 2026 07:16:43 +0000 Subject: [PATCH 1/2] ATOM ds r1 fp8 mtp 3 tokens Signed-off-by: seungrokj --- .github/configs/amd-master.yaml | 3 +-- benchmarks/single_node/dsr1_fp8_mi355x_atom_mtp.sh | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index e84fc0da5..14751007c 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -485,12 +485,11 @@ dsr1-fp8-mi355x-atom: - { tp: 8, conc-start: 4, conc-end: 128 } dsr1-fp8-mi355x-atom-mtp: - image: rocm/atom:rocm7.2.0-ubuntu24.04-pytorch2.9-atom0.1.1 + image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2 model: deepseek-ai/DeepSeek-R1-0528 model-prefix: dsr1 runner: mi355x precision: fp8 - # WIP framwork (no customers yet) framework: atom multinode: false seq-len-configs: diff --git a/benchmarks/single_node/dsr1_fp8_mi355x_atom_mtp.sh b/benchmarks/single_node/dsr1_fp8_mi355x_atom_mtp.sh index 920efb6ff..69179cec0 100644 --- a/benchmarks/single_node/dsr1_fp8_mi355x_atom_mtp.sh +++ b/benchmarks/single_node/dsr1_fp8_mi355x_atom_mtp.sh @@ -53,6 +53,7 @@ python3 -m atom.entrypoints.openai_server \ -tp $TP \ --kv_cache_dtype fp8 $CALCULATED_MAX_MODEL_LEN $EP \ --method mtp \ + --num-speculative-tokens 3 \ > $SERVER_LOG 2>&1 & SERVER_PID=$! @@ -83,5 +84,3 @@ fi # Stop GPU monitoring stop_gpu_monitor set +x - -set -x From 109c97460e00503518aa67079a536ae057f70bc2 Mon Sep 17 00:00:00 2001 From: seungrokj Date: Tue, 31 Mar 2026 07:21:18 +0000 Subject: [PATCH 2/2] ATOM supports DeepSeek R1 MI355X FP8 MTP 3 --- perf-changelog.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 967edc19c..dc7310c84 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1220,3 +1220,10 @@ - "Uses nvidia/GLM-5-NVFP4 model with modelopt_fp4 quantization" - "Image: lmsysorg/sglang:nightly-dev-cu13-20260328-a27651d5" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/973 + +- config-keys: + - dsr1-fp8-mi355x-atom-mtp + description: + - "DeepSeek R1 MI355X FP8 ATOM-MTP config to support MTP 3 tokens" + - "Image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/984