From bc90f18026609b17afcb82104cb654c68541660e Mon Sep 17 00:00:00 2001
From: seungrokj <seungrok.jung@amd.com>
Date: Tue, 31 Mar 2026 07:16:43 +0000
Subject: [PATCH 1/2] ATOM ds r1 fp8 mtp 3 tokens

Signed-off-by: seungrokj <seungrok.jung@amd.com>
---
 .github/configs/amd-master.yaml                    | 3 +--
 benchmarks/single_node/dsr1_fp8_mi355x_atom_mtp.sh | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml
index e84fc0da5..14751007c 100644
--- a/.github/configs/amd-master.yaml
+++ b/.github/configs/amd-master.yaml
@@ -485,12 +485,11 @@ dsr1-fp8-mi355x-atom:
     - { tp: 8, conc-start: 4, conc-end: 128 }
 
 dsr1-fp8-mi355x-atom-mtp:
-  image: rocm/atom:rocm7.2.0-ubuntu24.04-pytorch2.9-atom0.1.1
+  image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2
   model: deepseek-ai/DeepSeek-R1-0528
   model-prefix: dsr1
   runner: mi355x
   precision: fp8
-  # WIP framwork (no customers yet)
   framework: atom
   multinode: false
   seq-len-configs:
diff --git a/benchmarks/single_node/dsr1_fp8_mi355x_atom_mtp.sh b/benchmarks/single_node/dsr1_fp8_mi355x_atom_mtp.sh
index 920efb6ff..69179cec0 100644
--- a/benchmarks/single_node/dsr1_fp8_mi355x_atom_mtp.sh
+++ b/benchmarks/single_node/dsr1_fp8_mi355x_atom_mtp.sh
@@ -53,6 +53,7 @@ python3 -m atom.entrypoints.openai_server \
     -tp $TP \
     --kv_cache_dtype fp8 $CALCULATED_MAX_MODEL_LEN $EP \
     --method mtp \
+    --num-speculative-tokens 3 \
     > $SERVER_LOG 2>&1 &
 
 SERVER_PID=$!
@@ -83,5 +84,3 @@ fi
 # Stop GPU monitoring
 stop_gpu_monitor
 set +x
-
-set -x

From 109c97460e00503518aa67079a536ae057f70bc2 Mon Sep 17 00:00:00 2001
From: seungrokj <seungrok.jung@amd.com>
Date: Tue, 31 Mar 2026 07:21:18 +0000
Subject: [PATCH 2/2] ATOM supports DeepSeek R1 MI355X FP8 MTP 3

---
 perf-changelog.yaml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 967edc19c..dc7310c84 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -1220,3 +1220,10 @@
     - "Uses nvidia/GLM-5-NVFP4 model with modelopt_fp4 quantization"
     - "Image: lmsysorg/sglang:nightly-dev-cu13-20260328-a27651d5"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/973
+
+- config-keys:
+    - dsr1-fp8-mi355x-atom-mtp
+  description:
+    - "DeepSeek R1 MI355X FP8 ATOM-MTP config to support MTP 3 tokens"
+    - "Image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/984