dexpace · OmarAlJarrah · Jun 16, 2026 · Jun 15, 2026 · Jun 16, 2026 · Jun 16, 2026
diff --git a/docs/pipelines.md b/docs/pipelines.md
@@ -389,13 +389,21 @@ and carries:
 | `maxDelay`          | `8s`                      | Cap on the scaled delay                                       |
 | `maxAttempts`       | `3`                       | Total attempts including the first send; `1` disables retries |
 | `jitter`            | `0.2`                     | Symmetric jitter fraction in `[0.0, 1.0]`                     |
-| `retryableStatuses` | `{429, 500, 502, 503, 504}` | Status codes that trigger a retry on an `HttpException`     |
+| `retryableStatuses` | `{408, 429, 500, 502, 503, 504}` | Status codes that trigger a retry on an `HttpException` |
 | `retryableMethods`  | `{GET, HEAD, OPTIONS, PUT, DELETE}` | Methods retryable by RFC 9110; others need a replayable body |
 | `scheduler`         | `null`                    | Optional caller scheduler; `null` uses a daemon scheduler     |
 
-`408` (Request Timeout) is intentionally excluded from the default `retryableStatuses` — a
-server-side 408 usually means the client was slow to send and is unlikely to improve on retry.
-Callers that disagree can opt in via the builder.
+These are the SDK's canonical retry defaults: the stage-based `DefaultRetryStep` (and its
+`HttpRetryOptions`) share the same base delay (`200ms`), max delay (`8s`), multiplier (`2.0`),
+jitter (`0.2`), retryable-status policy, and total send budget (3 attempts). Both stacks compute
+their exponential schedule through the one `BackoffCalculator`, so the two cannot drift apart;
+the only intentional difference is that the stage-based step has no `totalTimeout` deadline.
+`HttpRetryOptions` counts *retries* (`maxRetries`, default `2`) while `RetrySettings` counts
+*total attempts* (`maxAttempts`, default `3`) — both default to the same 3 sends.
+
+`408` (Request Timeout) **is** retryable by default, matching
+`RetryUtils.isRetryable`/`HttpException.retryable` and the stage-based step. Callers wanting a
+stricter posture can pass a tighter `retryableStatuses` set to the builder.
 
 ---
 

diff --git a/sdk-core/api/sdk-core.api b/sdk-core/api/sdk-core.api
@@ -793,7 +793,6 @@ public class org/dexpace/sdk/core/http/pipeline/steps/DefaultRedirectStep : org/
 public class org/dexpace/sdk/core/http/pipeline/steps/DefaultRetryStep : org/dexpace/sdk/core/http/pipeline/steps/RetryStep {
 	public static final field Companion Lorg/dexpace/sdk/core/http/pipeline/steps/DefaultRetryStep$Companion;
 	public static final field DEFAULT_MAX_RETRIES I
-	public static final field MAX_SHIFT_TRY_COUNT I
 	public fun <init> ()V
 	public fun <init> (Lorg/dexpace/sdk/core/http/pipeline/steps/HttpRetryOptions;)V
 	public fun <init> (Lorg/dexpace/sdk/core/http/pipeline/steps/HttpRetryOptions;Lorg/dexpace/sdk/core/util/Clock;)V
@@ -2163,7 +2162,10 @@ public final class org/dexpace/sdk/core/pipeline/step/retry/RetryAfterParser {
 
 public final class org/dexpace/sdk/core/pipeline/step/retry/RetrySettings {
 	public static final field Companion Lorg/dexpace/sdk/core/pipeline/step/retry/RetrySettings$Companion;
+	public static final field DEFAULT_DELAY_MULTIPLIER D
 	public static final field DEFAULT_INITIAL_DELAY Ljava/time/Duration;
+	public static final field DEFAULT_JITTER D
+	public static final field DEFAULT_MAX_ATTEMPTS I
 	public static final field DEFAULT_MAX_DELAY Ljava/time/Duration;
 	public static final field DEFAULT_RETRYABLE_METHODS Ljava/util/Set;
 	public static final field DEFAULT_RETRYABLE_STATUSES Ljava/util/Set;

diff --git a/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/DefaultRetryStep.kt b/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/DefaultRetryStep.kt
@@ -13,12 +13,13 @@ import org.dexpace.sdk.core.http.request.Method
 import org.dexpace.sdk.core.http.request.Request
 import org.dexpace.sdk.core.http.response.Response
 import org.dexpace.sdk.core.instrumentation.ClientLogger
+import org.dexpace.sdk.core.pipeline.step.retry.BackoffCalculator
 import org.dexpace.sdk.core.pipeline.step.retry.RetryAfterParser
+import org.dexpace.sdk.core.pipeline.step.retry.RetrySettings
 import org.dexpace.sdk.core.util.Clock
 import java.io.IOException
 import java.io.InterruptedIOException
 import java.time.Duration
-import java.util.concurrent.ThreadLocalRandom
 
 /**
  * Default [RetryStep]. Drives an iterative retry loop with classified failure detection,
@@ -40,9 +41,8 @@ import java.util.concurrent.ThreadLocalRandom
  *  4. Sleeps via [Clock.sleep]; an interrupt during sleep throws [InterruptedIOException]
  *     with the original [InterruptedException] and any accumulated prior failures attached
  *     as suppressed — retries are NOT resumed after an interrupt.
- *  5. Caps `tryCount` at [MAX_SHIFT_TRY_COUNT] before computing `1L shl tryCount` so the
- *     left-shift can never overflow (the resulting delay is clamped to [HttpRetryOptions.maxDelay]
- *     anyway, so the cap is invisible to callers).
+ *  5. Computes the exponential delay through the shared [BackoffCalculator], which saturates
+ *     rather than overflows on extreme attempt counts and clamps to [HttpRetryOptions.maxDelay].
  *
  * ## Body replayability
  *
@@ -69,8 +69,12 @@ import java.util.concurrent.ThreadLocalRandom
  *     parsed from the response (response path only). A negative or unparseable value
  *     falls through; a value of zero produces an immediate retry.
  *  3. [HttpRetryOptions.fixedDelay] — if set, every retry waits exactly this duration.
- *  4. Exponential backoff: `baseDelay * (1L shl tryCount)` clamped to `maxDelay`, with
- *     ±5% random jitter via [ThreadLocalRandom].
+ *  4. Exponential backoff computed by the shared [BackoffCalculator]:
+ *     `baseDelay * 2.0^tryCount` clamped to `maxDelay`, with symmetric ±10% jitter
+ *     ([RetrySettings.DEFAULT_JITTER]). This is the same calculator the recovery-aware
+ *     `pipeline.step.retry.RetryStep` uses, so both stacks share one backoff formula and one
+ *     set of defaults. The deadline-shrinking that the calculator also offers is disabled here
+ *     (this stage-based step carries no total-timeout budget).
  *
  * ## Failure handling
  *
@@ -146,6 +150,33 @@ public open class DefaultRetryStep
          */
         private val options: HttpRetryOptions = clampOptions(options)
 
+        /**
+         * The [options]' exponential parameters expressed as a [RetrySettings] view so the shared
+         * [BackoffCalculator] can compute this stack's schedule. Built once per step instance:
+         *  - `initialDelay` / `maxDelay` come from the options.
+         *  - `delayMultiplier` (2.0) and `jitter` (0.2) are the canonical shared constants — the
+         *    options object does not expose its own multiplier/jitter, so the SDK defaults apply.
+         *    If [HttpRetryOptions] ever gains configurable multiplier/jitter knobs, this view must
+         *    read them from the options instead of the constants, or the new knobs are silently
+         *    ignored on this stack.
+         *  - `totalTimeout = ZERO` disables the deadline cap: the stage-based step has no budget.
+         * The `fixedDelay` path never consults this view; it short-circuits in [backoffOrFixed].
+         *
+         * Building this view also validates the delay magnitudes eagerly: [RetrySettings.builder]
+         * rejects a negative `baseDelay`/`maxDelay` and one larger than the calculator's
+         * ~292-year nanosecond ceiling. [HttpRetryOptions] performs no such range check, so a
+         * pathological delay surfaces as an [IllegalArgumentException] here, at step construction,
+         * rather than later at delay-computation time.
+         */
+        private val backoffSettings: RetrySettings =
+            RetrySettings.builder()
+                .initialDelay(this.options.baseDelay)
+                .maxDelay(this.options.maxDelay)
+                .delayMultiplier(RetrySettings.DEFAULT_DELAY_MULTIPLIER)
+                .jitter(RetrySettings.DEFAULT_JITTER)
+                .totalTimeout(Duration.ZERO)
+                .build()
+
         /**
          * Sends [request] through the downstream pipeline with automatic retry on retryable failures.
          *
@@ -437,51 +468,16 @@ public open class DefaultRetryStep
             }
 
         /**
-         * Returns [HttpRetryOptions.fixedDelay] if set, otherwise the exponential-backoff
-         * delay for [tryCount]. The shift count is capped at [MAX_SHIFT_TRY_COUNT] so the
-         * `1L shl tryCount` term never overflows; the result is always clamped to [HttpRetryOptions.maxDelay]
-         * anyway, so the cap is invisible in practice.
-         */
-        private fun backoffOrFixed(tryCount: Int): Duration = options.fixedDelay ?: exponentialBackoff(tryCount)
-
-        /**
-         * `baseDelay * (1L shl tryCount)` clamped to `maxDelay`, plus a ±5% jitter sampled
-         * from [ThreadLocalRandom]. Pure function of [tryCount] and the configured options.
-         */
-        private fun exponentialBackoff(tryCount: Int): Duration {
-            val baseNanos = options.baseDelay.toNanos()
-            if (baseNanos == 0L) return Duration.ZERO
-            val maxNanos = options.maxDelay.toNanos()
-            val safeShift = tryCount.coerceAtMost(MAX_SHIFT_TRY_COUNT)
-            // 1L shl 30 ~= 1e9 — multiplying by 800ms (8e8 ns) overflows. Cap on the long
-            // multiply itself: if `baseNanos * (1L shl safeShift)` would overflow, clamp.
-            val multiplier = 1L shl safeShift
-            val scaled =
-                if (baseNanos > 0 && multiplier > Long.MAX_VALUE / baseNanos) {
-                    Long.MAX_VALUE
-                } else {
-                    baseNanos * multiplier
-                }
-            val clamped = scaled.coerceAtMost(maxNanos)
-            val jittered = applyJitter(clamped)
-            // Guarantee a non-negative result — jitter could push us under zero if the caller
-            // configured pathological options (e.g. baseDelay equal to negative epsilon).
-            return Duration.ofNanos(jittered.coerceAtLeast(0L))
-        }
-
-        /**
-         * Applies a ±5% jitter to [nanos]. Sample is drawn from [ThreadLocalRandom] which is
-         * per-thread, so there is no cross-thread contention on the retry hot path.
+         * Returns [HttpRetryOptions.fixedDelay] if set, otherwise the exponential-backoff delay
+         * for [tryCount]. The backoff is computed by the shared [BackoffCalculator] from
+         * [backoffSettings] so this stack and the recovery-aware `RetryStep` share one formula.
+         *
+         * [tryCount] is 0-indexed here (`0` = the delay before the first retry), whereas
+         * [BackoffCalculator.computeDelay] is 1-indexed (`1` = first retry); the `+ 1` bridges
+         * the two so both produce `baseDelay`, `2·baseDelay`, `4·baseDelay`, … capped at `maxDelay`.
          */
-        private fun applyJitter(nanos: Long): Long {
-            if (nanos == 0L) return 0L
-            // 5% of nanos, used as the magnitude bound on the random sample.
-            val jitterMagnitude = nanos / JITTER_DIVISOR
-            if (jitterMagnitude == 0L) return nanos
-            // ThreadLocalRandom.nextLong(origin, bound) is inclusive of origin, exclusive of bound.
-            val offset = ThreadLocalRandom.current().nextLong(-jitterMagnitude, jitterMagnitude + 1L)
-            return nanos + offset
-        }
+        private fun backoffOrFixed(tryCount: Int): Duration =
+            options.fixedDelay ?: BackoffCalculator.computeDelay(tryCount + 1, backoffSettings)
 
         // --------------- Retry-After parsing ---------------
 
@@ -581,22 +577,14 @@ public open class DefaultRetryStep
 
         public companion object {
             /**
-             * Default [HttpRetryOptions.maxRetries] applied when the caller passes a negative
-             * value. Matches Azure Core's `RetryOptions` default.
+             * Default retry count applied when the caller passes a negative
+             * [HttpRetryOptions.maxRetries], and the value baked into the no-arg
+             * [HttpRetryOptions] default. `2` retries on top of the initial send is the SDK's
+             * canonical budget — `initial + DEFAULT_MAX_RETRIES == 3`, matching
+             * [RetrySettings.DEFAULT_MAX_ATTEMPTS] so both retry stacks default to the same
+             * number of total sends.
              */
-            public const val DEFAULT_MAX_RETRIES: Int = 3
-
-            /**
-             * Upper bound on `tryCount` used for the `1L shl tryCount` term in
-             * [DefaultRetryStep.exponentialBackoff]. `1L shl 30` ~= 1.07e9 — the scaled delay is
-             * always clamped to [HttpRetryOptions.maxDelay] long before this bound is hit, so the
-             * cap is a paranoid guard against integer overflow rather than a behavior knob.
-             */
-            public const val MAX_SHIFT_TRY_COUNT: Int = 30
-
-            // Jitter is ±5% of the computed delay; expressed as the divisor (nanos / 20) to
-            // avoid an extra multiplication on the hot path. See [applyJitter].
-            private const val JITTER_DIVISOR = 20L
+            public const val DEFAULT_MAX_RETRIES: Int = 2
 
             // Nanoseconds in one millisecond — used to convert monotonic-clock deltas to ms
             // for retry log events.

diff --git a/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/HttpRetryOptions.kt b/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/HttpRetryOptions.kt
@@ -8,6 +8,7 @@
 package org.dexpace.sdk.core.http.pipeline.steps
 
 import org.dexpace.sdk.core.http.common.HttpHeaderName
+import org.dexpace.sdk.core.pipeline.step.retry.RetrySettings
 import org.dexpace.sdk.core.util.RetryUtils
 import java.time.Duration
 import java.util.Collections
@@ -40,12 +41,15 @@ public fun interface HttpRetryDelayProvider {
 }
 
 /**
- * Configuration for [DefaultRetryStep]. Defaults mirror Azure Core's retry policy:
- *  - [maxRetries] = 3
- *  - [baseDelay] = 800ms (exponentially scaled per attempt)
- *  - [maxDelay] = 8s (cap on the scaled delay)
+ * Configuration for [DefaultRetryStep]. The numeric defaults are the SDK's canonical retry
+ * defaults, shared with the recovery-aware [RetrySettings] so both retry stacks compute the
+ * same backoff schedule (via [org.dexpace.sdk.core.pipeline.step.retry.BackoffCalculator]):
+ *  - [maxRetries] = 2 (initial send + 2 retries = 3 total attempts, matching
+ *    [RetrySettings.DEFAULT_MAX_ATTEMPTS]).
+ *  - [baseDelay] = 200ms (= [RetrySettings.DEFAULT_INITIAL_DELAY]; exponentially scaled per attempt).
+ *  - [maxDelay] = 8s (= [RetrySettings.DEFAULT_MAX_DELAY]; cap on the scaled delay).
  *  - [fixedDelay] = null (exponential backoff is used; when non-null it overrides the
- *    backoff entirely and every retry waits exactly [fixedDelay])
+ *    backoff entirely and every retry waits exactly [fixedDelay]).
  *  - [retryAfterHeaders] = `Retry-After`, `retry-after-ms`, `x-ms-retry-after-ms` —
  *    parsed in declared order; the first present wins. Drop the Microsoft-specific
  *    variants by passing a tighter list for stricter posture.
@@ -54,15 +58,20 @@ public fun interface HttpRetryDelayProvider {
  *    anywhere in the cause chain, per [RetryUtils.isRetryable].
  *  - [delayFromCondition] = null delay (falls through to `Retry-After` parsing, then backoff).
  *
+ * The exponential schedule, multiplier (2.0), and symmetric jitter (0.2) are sourced from the
+ * shared [RetrySettings] constants and applied by `BackoffCalculator`, so this stack and the
+ * recovery-aware stack cannot drift apart. The one intentional difference is the deadline: this
+ * stage-based step has no total-timeout budget, so it never shrinks a delay against one.
+ *
  * The companion [HttpRetryOptions.fixed] factory builds an options instance whose delay
  * never grows — useful for test injection or high-throughput retry against flaky endpoints.
  */
 public class HttpRetryOptions
     @JvmOverloads
     constructor(
-        public val maxRetries: Int = 3,
-        public val baseDelay: Duration = Duration.ofMillis(DEFAULT_BASE_DELAY_MS),
-        public val maxDelay: Duration = Duration.ofSeconds(DEFAULT_MAX_DELAY_SECONDS),
+        public val maxRetries: Int = DEFAULT_MAX_RETRIES,
+        public val baseDelay: Duration = RetrySettings.DEFAULT_INITIAL_DELAY,
+        public val maxDelay: Duration = RetrySettings.DEFAULT_MAX_DELAY,
         public val fixedDelay: Duration? = null,
         public val retryAfterHeaders: List<HttpHeaderName> = DEFAULT_RETRY_AFTER_HEADERS,
         public val shouldRetryCondition: HttpRetryConditionPredicate =
@@ -72,10 +81,9 @@ public class HttpRetryOptions
         public val delayFromCondition: HttpRetryDelayProvider = HttpRetryDelayProvider { null },
     ) {
         public companion object {
-            // Default exponential-backoff parameters tuned to favour fast first-retry while
-            // bounding cumulative latency. Aligned with Azure Core's RetryOptions defaults.
-            private const val DEFAULT_BASE_DELAY_MS = 800L
-            private const val DEFAULT_MAX_DELAY_SECONDS = 8L
+            // The default retry count is the canonical SDK budget, kept in one place on
+            // DefaultRetryStep (initial send + DEFAULT_MAX_RETRIES == RetrySettings.DEFAULT_MAX_ATTEMPTS).
+            private const val DEFAULT_MAX_RETRIES = DefaultRetryStep.DEFAULT_MAX_RETRIES
 
             /**
              * The three `Retry-After` header forms parsed by [DefaultRetryStep]. Order matters —