From 4c6bcee035f9db6a521a2e8f821d40bde20216d1 Mon Sep 17 00:00:00 2001 From: OmarAlJarrah Date: Mon, 15 Jun 2026 22:41:09 +0300 Subject: [PATCH 1/4] feat: add retry controls for fractional Retry-After, attempt headers, and server-driven retries Three additive, opt-in enhancements to the retry layer: - Parse fractional Retry-After values. RetryAfterParser now reads the Retry-After delta-seconds form with toDoubleOrNull, so values like `Retry-After: 1.5` are honoured to nanosecond resolution instead of being silently ignored. Integer seconds and the HTTP-date form are unchanged, and the negative / NaN / non-finite guards plus the 365-day ceiling still apply. The numeric form now shares that ceiling with the date and Unix-epoch forms, so a far-future delta clamps rather than flowing through unbounded. Both retry layers benefit, since they delegate parsing here. - Optional per-attempt retry-count request header. RetrySettings gains an opt-in attemptHeaderName (default null). When set, the recovery-aware RetryStep stamps that header with the 1-based attempt ordinal on a per-attempt copy of the request, so servers and proxies can observe the retry count. The header is applied via Request.newBuilder, leaving the immutable template and any caller-supplied idempotency key untouched. - Server-driven retry predicate. New ServerOverrideRetryPredicate implements the composable HttpRetryConditionPredicate seam and honours an X-Should-Retry-style response header (configurable name): a truthy value forces a retry, a falsy value suppresses one, and an absent or unrecognised value defers to a delegate predicate. It is not installed by default; the default retryable-status set is unchanged, so 409 stays out of it unless a server explicitly opts a response in. --- sdk-core/api/sdk-core.api | 19 ++- .../steps/ServerOverrideRetryPredicate.kt | 100 +++++++++++++ .../pipeline/step/retry/RetryAfterParser.kt | 31 +++- .../core/pipeline/step/retry/RetrySettings.kt | 22 +++ .../sdk/core/pipeline/step/retry/RetryStep.kt | 42 +++++- .../core/http/pipeline/steps/RetryStepTest.kt | 136 ++++++++++++++++++ .../step/retry/RetryAfterParserTest.kt | 53 ++++++- .../core/pipeline/step/retry/RetryStepTest.kt | 105 ++++++++++++++ 8 files changed, 493 insertions(+), 15 deletions(-) create mode 100644 sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/ServerOverrideRetryPredicate.kt diff --git a/sdk-core/api/sdk-core.api b/sdk-core/api/sdk-core.api index de3a5d14..f136a561 100644 --- a/sdk-core/api/sdk-core.api +++ b/sdk-core/api/sdk-core.api @@ -945,6 +945,21 @@ public abstract class org/dexpace/sdk/core/http/pipeline/steps/RetryStep : org/d public final fun getStage ()Lorg/dexpace/sdk/core/http/pipeline/Stage; } +public final class org/dexpace/sdk/core/http/pipeline/steps/ServerOverrideRetryPredicate : org/dexpace/sdk/core/http/pipeline/steps/HttpRetryConditionPredicate { + public static final field Companion Lorg/dexpace/sdk/core/http/pipeline/steps/ServerOverrideRetryPredicate$Companion; + public static final field DEFAULT_HEADER_NAME Lorg/dexpace/sdk/core/http/common/HttpHeaderName; + public fun ()V + public fun (Lorg/dexpace/sdk/core/http/common/HttpHeaderName;)V + public fun (Lorg/dexpace/sdk/core/http/common/HttpHeaderName;Lorg/dexpace/sdk/core/http/pipeline/steps/HttpRetryConditionPredicate;)V + public synthetic fun (Lorg/dexpace/sdk/core/http/common/HttpHeaderName;Lorg/dexpace/sdk/core/http/pipeline/steps/HttpRetryConditionPredicate;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun getDelegate ()Lorg/dexpace/sdk/core/http/pipeline/steps/HttpRetryConditionPredicate; + public final fun getHeaderName ()Lorg/dexpace/sdk/core/http/common/HttpHeaderName; + public fun shouldRetry (Lorg/dexpace/sdk/core/http/pipeline/steps/HttpRetryCondition;)Z +} + +public final class org/dexpace/sdk/core/http/pipeline/steps/ServerOverrideRetryPredicate$Companion { +} + public final class org/dexpace/sdk/core/http/pipeline/steps/SetDateStep : org/dexpace/sdk/core/http/pipeline/HttpStep { public fun ()V public fun (Lorg/dexpace/sdk/core/util/Clock;)V @@ -2112,9 +2127,10 @@ public final class org/dexpace/sdk/core/pipeline/step/retry/RetrySettings { public static final field DEFAULT_RETRYABLE_METHODS Ljava/util/Set; public static final field DEFAULT_RETRYABLE_STATUSES Ljava/util/Set; public static final field DEFAULT_TOTAL_TIMEOUT Ljava/time/Duration; - public synthetic fun (Ljava/time/Duration;Ljava/time/Duration;DLjava/time/Duration;IDLjava/util/Set;Ljava/util/Set;Ljava/util/concurrent/ScheduledExecutorService;Lkotlin/jvm/internal/DefaultConstructorMarker;)V + public synthetic fun (Ljava/time/Duration;Ljava/time/Duration;DLjava/time/Duration;IDLjava/util/Set;Ljava/util/Set;Ljava/util/concurrent/ScheduledExecutorService;Lorg/dexpace/sdk/core/http/common/HttpHeaderName;Lkotlin/jvm/internal/DefaultConstructorMarker;)V public static final fun builder ()Lorg/dexpace/sdk/core/pipeline/step/retry/RetrySettings$RetrySettingsBuilder; public static final fun defaults ()Lorg/dexpace/sdk/core/pipeline/step/retry/RetrySettings; + public final fun getAttemptHeaderName ()Lorg/dexpace/sdk/core/http/common/HttpHeaderName; public final fun getDelayMultiplier ()D public final fun getInitialDelay ()Ljava/time/Duration; public final fun getJitter ()D @@ -2135,6 +2151,7 @@ public final class org/dexpace/sdk/core/pipeline/step/retry/RetrySettings$Compan public final class org/dexpace/sdk/core/pipeline/step/retry/RetrySettings$RetrySettingsBuilder : org/dexpace/sdk/core/generics/Builder { public fun ()V public fun (Lorg/dexpace/sdk/core/pipeline/step/retry/RetrySettings;)V + public final fun attemptHeaderName (Lorg/dexpace/sdk/core/http/common/HttpHeaderName;)Lorg/dexpace/sdk/core/pipeline/step/retry/RetrySettings$RetrySettingsBuilder; public synthetic fun build ()Ljava/lang/Object; public fun build ()Lorg/dexpace/sdk/core/pipeline/step/retry/RetrySettings; public final fun delayMultiplier (D)Lorg/dexpace/sdk/core/pipeline/step/retry/RetrySettings$RetrySettingsBuilder; diff --git a/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/ServerOverrideRetryPredicate.kt b/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/ServerOverrideRetryPredicate.kt new file mode 100644 index 00000000..db58c994 --- /dev/null +++ b/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/ServerOverrideRetryPredicate.kt @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2026 dexpace and Omar Aljarrah + * + * Licensed under the MIT License. See LICENSE in the project root. + * SPDX-License-Identifier: MIT + */ + +package org.dexpace.sdk.core.http.pipeline.steps + +import org.dexpace.sdk.core.http.common.HttpHeaderName +import java.util.Locale + +/** + * Composable [HttpRetryConditionPredicate] that lets the server steer the retry decision via an + * explicit response header (`X-Should-Retry` by default). + * + * Some APIs annotate responses with a definitive "retry / do not retry" signal that the client + * cannot infer from the status code alone — for example a `409 Conflict` that is genuinely + * transient, or a `503` the server knows will not recover. This predicate honours that signal + * when, and only when, the header is present: + * + * - A **truthy** value (`true`, `1`, `yes`, `retry`, case-insensitive) forces a retry, even for + * a status the default classifier would not retry. + * - A **falsy** value (`false`, `0`, `no`, `stop`, case-insensitive) suppresses a retry, even + * for a status the default classifier would retry. + * - When the header is **absent**, unrecognised, or there is no response (the exception path), + * the decision is delegated to [delegate] — so wiring this predicate in changes behaviour + * only when the server actually speaks. + * + * ## Opt-in + * + * This predicate is **not** installed by default. A caller enables it by passing an instance as + * [HttpRetryOptions.shouldRetryCondition] (and/or [HttpRetryOptions.shouldRetryException]). The + * SDK's default retryable-status set is unchanged — in particular `409 Conflict` stays out of + * it; this predicate is the mechanism by which a server can opt a `409` (or any other status) + * into a retry, rather than widening the default set for everyone. + * + * ## Composition + * + * [delegate] defaults to the SDK's standard classifier, dispatched per path: the response + * classifier (`408 / 429 / 5xx`, except `501` / `505`) on the response path, and the exception + * classifier (`IOException` / `TimeoutException` anywhere in the cause chain) on the exception + * path. Because the override header only appears on responses, wiring this predicate into + * [HttpRetryOptions.shouldRetryException] leaves the exception-path decision entirely to the + * delegate. Supply a different delegate to layer the server override on top of bespoke retry + * logic, or [HttpRetryConditionPredicate] `{ false }` to make the server header the sole + * authority. + * + * ## Thread-safety + * + * Immutable and stateless — safe to share across concurrent requests. + * + * @property headerName The response header consulted for the override signal. Defaults to + * `X-Should-Retry`. + * @property delegate Fallback predicate consulted when the header is absent or unrecognised, or + * on the exception path. Defaults to the standard per-path classifier. + */ +public class ServerOverrideRetryPredicate + @JvmOverloads + constructor( + public val headerName: HttpHeaderName = DEFAULT_HEADER_NAME, + public val delegate: HttpRetryConditionPredicate = + HttpRetryConditionPredicate(::defaultClassifier), + ) : HttpRetryConditionPredicate { + override fun shouldRetry(condition: HttpRetryCondition): Boolean { + val response = condition.response ?: return delegate.shouldRetry(condition) + val raw = response.headers.get(headerName) ?: return delegate.shouldRetry(condition) + return when (raw.trim().lowercase(Locale.US)) { + in TRUTHY -> true + in FALSY -> false + // An unrecognised value is not a directive — defer rather than guess. + else -> delegate.shouldRetry(condition) + } + } + + public companion object { + /** Default override header (`X-Should-Retry`). */ + @JvmField + public val DEFAULT_HEADER_NAME: HttpHeaderName = HttpHeaderName.fromString("X-Should-Retry") + + /** Header values that force a retry. */ + private val TRUTHY: Set = setOf("true", "1", "yes", "retry") + + /** Header values that suppress a retry. */ + private val FALSY: Set = setOf("false", "0", "no", "stop") + } + } + +/** + * Default [ServerOverrideRetryPredicate.delegate]: applies the SDK's standard classification for + * whichever path the [condition] represents — the response classifier when a response is present, + * the exception classifier otherwise. This keeps the override predicate's fall-through behaviour + * identical to the stock [HttpRetryOptions] defaults on both the response and exception paths. + */ +private fun defaultClassifier(condition: HttpRetryCondition): Boolean = + if (condition.response != null) { + defaultShouldRetryResponse(condition) + } else { + defaultShouldRetryException(condition) + } diff --git a/sdk-core/src/main/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryAfterParser.kt b/sdk-core/src/main/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryAfterParser.kt index 1323a18e..d475b8f3 100644 --- a/sdk-core/src/main/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryAfterParser.kt +++ b/sdk-core/src/main/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryAfterParser.kt @@ -28,7 +28,9 @@ import java.util.concurrent.ThreadLocalRandom * * ## Recognized header forms * - * 1. `Retry-After: ` — RFC 7231 §7.1.3 numeric (delta-seconds) form. + * 1. `Retry-After: ` — RFC 7231 §7.1.3 numeric (delta-seconds) form. Both integer + * and fractional values (e.g. `1.5`) are accepted; the fractional part is honoured to + * nanosecond resolution. * 2. `Retry-After: ` — RFC 7231 §7.1.3 absolute-date form (parsed via * [DateTimeRfc1123], which tolerates an informational weekday per RFC 7231 §7.1.1.1). * 3. `retry-after-ms: ` — millisecond delta variant. @@ -106,6 +108,12 @@ public object RetryAfterParser { */ private val MAX_DELAY: Duration = Duration.ofDays(365) + /** [MAX_DELAY] expressed in whole seconds — the clamp threshold for fractional parsing. */ + private val MAX_DELAY_SECONDS: Double = MAX_DELAY.seconds.toDouble() + + /** Nanoseconds per second — the scale factor for the fractional `Retry-After` conversion. */ + private const val NANOS_PER_SECOND: Double = 1_000_000_000.0 + /** * Parses the next-attempt delay from [headers] relative to [now]. Returns `null` when no * recognized header is present or parseable. @@ -185,14 +193,23 @@ public object RetryAfterParser { } /** - * Parses [value] as a non-negative integer count of seconds. Returns `null` on any parse - * failure, including negative values — the retry layer falls back to its backoff - * schedule rather than retrying immediately against a misbehaving server. + * Parses [value] as a non-negative count of seconds. Accepts both the RFC 7231 §7.1.3 + * integer (delta-seconds) form and the fractional form (`1.5`) that real servers and + * proxies emit; the fractional part is honoured down to nanosecond resolution. + * + * Returns `null` on any parse failure, on a negative value, or on a non-finite value + * (`NaN`, `Infinity`) — the retry layer then falls back to its backoff schedule rather + * than retrying immediately against a misbehaving server. A finite but absurdly large + * value is clamped to [MAX_DELAY] before the nanosecond conversion so the resulting + * [Duration] can never overflow [Duration.toNanos] downstream. */ private fun parseNumericSeconds(value: String): Duration? { - val seconds = value.toLongOrNull() ?: return null - if (seconds < 0L) return null - return Duration.ofSeconds(seconds) + val seconds = value.toDoubleOrNull() ?: return null + if (seconds.isNaN() || seconds.isInfinite() || seconds < 0.0) return null + // Clamp in the seconds domain before converting to nanos: a value beyond the ceiling + // would overflow the `* NANOS_PER_SECOND` multiply and the Long cast below. + if (seconds >= MAX_DELAY_SECONDS) return MAX_DELAY + return Duration.ofNanos((seconds * NANOS_PER_SECOND).toLong()) } /** diff --git a/sdk-core/src/main/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetrySettings.kt b/sdk-core/src/main/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetrySettings.kt index e7654933..d54f5bb7 100644 --- a/sdk-core/src/main/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetrySettings.kt +++ b/sdk-core/src/main/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetrySettings.kt @@ -8,6 +8,7 @@ package org.dexpace.sdk.core.pipeline.step.retry import org.dexpace.sdk.core.generics.Builder +import org.dexpace.sdk.core.http.common.HttpHeaderName import org.dexpace.sdk.core.http.request.Method import java.time.Duration import java.util.Collections @@ -37,6 +38,7 @@ private val MAX_NANO_REPRESENTABLE_DELAY: Duration = Duration.ofNanos(Long.MAX_V * - [retryableMethods] = `{GET, HEAD, OPTIONS, PUT, DELETE}` — safe-by-method per RFC 9110. * `POST`/`PATCH`/etc. retry only when the request body is replayable. * - [scheduler] = `null` — fall back to the lazy daemon scheduler created by [RetryStep]. + * - [attemptHeaderName] = `null` — no per-attempt header is stamped (opt-in; see the property). * * ## Thread-safety * @@ -61,6 +63,12 @@ private val MAX_NANO_REPRESENTABLE_DELAY: Duration = Duration.ofNanos(Long.MAX_V * RFC). Non-idempotent methods (`POST`, `PATCH`) only retry when the body is replayable. * @property scheduler Optional caller-provided scheduler. When `null` [RetryStep] uses a * process-wide lazy daemon scheduler. + * @property attemptHeaderName Optional request header stamped on each attempt [RetryStep] + * dispatches, carrying the 1-based attempt ordinal (`1` for the original send, `2` for the + * first retry, and so on) so servers and proxies can observe the retry count. `null` (the + * default) disables the header entirely. The header is set on a per-attempt copy of the + * request, never on the immutable template. Any idempotency key the caller stamps stays + * stable across retries — only this attempt header changes per send. */ public class RetrySettings // The 9-arg constructor lives behind a `private` modifier — public construction goes @@ -78,6 +86,7 @@ public class RetrySettings public val retryableStatuses: Set, public val retryableMethods: Set, public val scheduler: ScheduledExecutorService?, + public val attemptHeaderName: HttpHeaderName?, ) { /** Returns a fresh [RetrySettingsBuilder] preloaded with this instance's values. */ public fun newBuilder(): RetrySettingsBuilder = RetrySettingsBuilder(this) @@ -96,6 +105,7 @@ public class RetrySettings private var retryableStatuses: Set = DEFAULT_RETRYABLE_STATUSES private var retryableMethods: Set = DEFAULT_RETRYABLE_METHODS private var scheduler: ScheduledExecutorService? = null + private var attemptHeaderName: HttpHeaderName? = null /** Creates an empty builder populated with the SDK defaults. */ public constructor() @@ -111,6 +121,7 @@ public class RetrySettings this.retryableStatuses = settings.retryableStatuses this.retryableMethods = settings.retryableMethods this.scheduler = settings.scheduler + this.attemptHeaderName = settings.attemptHeaderName } /** Sets [RetrySettings.totalTimeout]. Must be non-negative. */ @@ -186,6 +197,16 @@ public class RetrySettings this.scheduler = scheduler } + /** + * Sets [RetrySettings.attemptHeaderName]. When non-null, [RetryStep] stamps this + * header (carrying the 1-based attempt ordinal) on each attempt's request copy. + * `null` (the default) leaves attempts unstamped. + */ + public fun attemptHeaderName(attemptHeaderName: HttpHeaderName?): RetrySettingsBuilder = + apply { + this.attemptHeaderName = attemptHeaderName + } + /** Builds the immutable [RetrySettings] instance. */ override fun build(): RetrySettings = RetrySettings( @@ -198,6 +219,7 @@ public class RetrySettings retryableStatuses = Collections.unmodifiableSet(LinkedHashSet(retryableStatuses)), retryableMethods = Collections.unmodifiableSet(LinkedHashSet(retryableMethods)), scheduler = scheduler, + attemptHeaderName = attemptHeaderName, ) } diff --git a/sdk-core/src/main/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryStep.kt b/sdk-core/src/main/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryStep.kt index 8d50c8ec..f5b2e030 100644 --- a/sdk-core/src/main/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryStep.kt +++ b/sdk-core/src/main/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryStep.kt @@ -56,6 +56,14 @@ import java.util.concurrent.TimeUnit * is a deliberate departure from Square's `RetryInterceptor`, which retries on status alone and * silently double-sends a body it cannot replay on transport timeouts. * + * ## Per-attempt header + * + * When [RetrySettings.attemptHeaderName] is configured (it is `null` by default), every send is + * stamped with that header carrying the 1-based attempt ordinal (`1` for the original send, `2` + * for the first retry, …) on a fresh per-attempt copy of the request, so servers and proxies can + * observe the retry count. The header is set on a copy via [Request.newBuilder]; the captured + * template is never mutated, and any idempotency key already on the request is left untouched. + * * ## Cancellation * * Waits between attempts use [CompletableFuture.get] backed by [scheduler] — never @@ -142,9 +150,11 @@ public class RetryStep */ @Throws(Throwable::class) public fun attempt(): Response { + // The original send is attempt ordinal 1 — stamp it when the feature is enabled so + // the very first request carries the same observable counter the retries will. val initial: ResponseOutcome = try { - ResponseOutcome.Success(httpClient.execute(request)) + ResponseOutcome.Success(httpClient.execute(stampAttempt(request, attemptOrdinal = 1))) } catch (t: Throwable) { ResponseOutcome.Failure(t) } @@ -172,7 +182,9 @@ public class RetryStep is AttemptStep.Abort -> return ResponseOutcome.Failure(readyState.error) is AttemptStep.Proceed -> { state.attempt += 1 - val outcome = executeOnce() + // state.attempt is now the 1-based ordinal of the send about to happen: + // the original was 1, so the first retry dispatched here is 2. + val outcome = executeOnce(state.attempt) if (outcome is ResponseOutcome.Success) return outcome val nextError = (outcome as ResponseOutcome.Failure).error if (!isClassifiedRetryable(nextError)) return outcome @@ -268,10 +280,13 @@ public class RetryStep * Executes the request once via the captured transport, converting any throwable * raised by the transport into a [ResponseOutcome.Failure]. Preserves the interrupt * flag if the transport raises an [InterruptedException] mid-send. + * + * @param attemptOrdinal The 1-based ordinal of this send, stamped onto the per-attempt + * request copy when [RetrySettings.attemptHeaderName] is configured. */ - private fun executeOnce(): ResponseOutcome = + private fun executeOnce(attemptOrdinal: Int): ResponseOutcome = try { - ResponseOutcome.Success(httpClient.execute(request)) + ResponseOutcome.Success(httpClient.execute(stampAttempt(request, attemptOrdinal))) } catch (e: InterruptedException) { Thread.currentThread().interrupt() ResponseOutcome.Failure(e) @@ -279,6 +294,25 @@ public class RetryStep ResponseOutcome.Failure(t) } + /** + * Returns a per-attempt copy of [request] carrying the configured + * [RetrySettings.attemptHeaderName] set to [attemptOrdinal]. When no attempt header is + * configured the original [request] is returned unchanged — the no-op path allocates + * nothing, so the common (feature-off) case pays no cost. The returned copy is built via + * [Request.newBuilder] so the immutable template the step captured is never mutated; any + * idempotency key the caller stamped is preserved verbatim because only this single + * header is replaced. + */ + private fun stampAttempt( + request: Request, + attemptOrdinal: Int, + ): Request { + val header = settings.attemptHeaderName ?: return request + return request.newBuilder() + .setHeader(header.caseSensitiveName, attemptOrdinal.toString()) + .build() + } + /** * Blocks the calling thread for [delay] without pinning a carrier thread under Loom. * Uses [ScheduledExecutorService.schedule] + [CompletableFuture.get] so the wait can diff --git a/sdk-core/src/test/kotlin/org/dexpace/sdk/core/http/pipeline/steps/RetryStepTest.kt b/sdk-core/src/test/kotlin/org/dexpace/sdk/core/http/pipeline/steps/RetryStepTest.kt index 12fd9b1b..7fa2c3c4 100644 --- a/sdk-core/src/test/kotlin/org/dexpace/sdk/core/http/pipeline/steps/RetryStepTest.kt +++ b/sdk-core/src/test/kotlin/org/dexpace/sdk/core/http/pipeline/steps/RetryStepTest.kt @@ -1038,6 +1038,142 @@ class RetryStepTest { assertTrue(closes.get() >= 1, "retryable response must be closed when the delay override throws") } + // ----------------- ServerOverrideRetryPredicate (X-Should-Retry) ----------------- + + @Test + fun `server override is not consulted by default`() { + // Off-by-default: the stock options never look at X-Should-Retry, so a 200 carrying + // `X-Should-Retry: true` is still returned without a retry. + val fake = + FakeHttpClient() + .enqueue { status(200).header("X-Should-Retry", "true") } + + val pipeline = retryPipeline(fake) + val response = pipeline.send(getRequest()) + assertEquals(200, response.status.code) + assertEquals(1, fake.callCount) + } + + @Test + fun `server override forces a retry on an otherwise non-retryable status`() { + // 404 is not in the default retryable set, but an explicit server signal forces a retry + // when the predicate is wired in. + val opts = + HttpRetryOptions( + maxRetries = 3, + shouldRetryCondition = ServerOverrideRetryPredicate(), + ) + val fake = + FakeHttpClient() + .enqueue { status(404).header("X-Should-Retry", "true") } + .enqueue { status(200) } + + val pipeline = + HttpPipelineBuilder(fake) + .append(DefaultRetryStep(opts, zeroDelayClock())) + .build() + + val response = pipeline.send(getRequest()) + assertEquals(200, response.status.code) + assertEquals(2, fake.callCount) + } + + @Test + fun `server override suppresses a retry on an otherwise retryable status`() { + // 503 is normally retried, but the server explicitly tells us not to. + val opts = + HttpRetryOptions( + maxRetries = 3, + shouldRetryCondition = ServerOverrideRetryPredicate(), + ) + val fake = + FakeHttpClient() + .enqueue { status(503).header("X-Should-Retry", "false") } + + val pipeline = + HttpPipelineBuilder(fake) + .append(DefaultRetryStep(opts, zeroDelayClock())) + .build() + + val response = pipeline.send(getRequest()) + assertEquals(503, response.status.code) + assertEquals(1, fake.callCount) + } + + @Test + fun `server override falls back to the wrapped predicate when the header is absent`() { + // No X-Should-Retry header → defer to the delegate (here, the default classifier), + // so a 503 still retries and a 404 still does not. + val opts = + HttpRetryOptions( + maxRetries = 3, + shouldRetryCondition = ServerOverrideRetryPredicate(), + ) + val fake = + FakeHttpClient() + .enqueue { status(503) } + .enqueue { status(200) } + + val pipeline = + HttpPipelineBuilder(fake) + .append(DefaultRetryStep(opts, zeroDelayClock())) + .build() + + val response = pipeline.send(getRequest()) + assertEquals(200, response.status.code) + assertEquals(2, fake.callCount) + } + + @Test + fun `server override honours a configurable header name`() { + val opts = + HttpRetryOptions( + maxRetries = 3, + shouldRetryCondition = + ServerOverrideRetryPredicate(headerName = HttpHeaderName.fromString("X-Retry-Me")), + ) + val fake = + FakeHttpClient() + .enqueue { status(404).header("X-Retry-Me", "true") } + .enqueue { status(200) } + + val pipeline = + HttpPipelineBuilder(fake) + .append(DefaultRetryStep(opts, zeroDelayClock())) + .build() + + val response = pipeline.send(getRequest()) + assertEquals(200, response.status.code) + assertEquals(2, fake.callCount) + } + + @Test + fun `server override defers on the exception path`() { + // The header lives on responses; on the exception path the predicate defers to the + // delegate. With the default delegate, a retryable IOException still retries. + val opts = + HttpRetryOptions( + maxRetries = 3, + shouldRetryException = ServerOverrideRetryPredicate(), + ) + val attempts = AtomicInteger(0) + val client = + object : HttpClient { + override fun execute(request: Request): Response { + if (attempts.getAndIncrement() == 0) throw IOException("transient") + return okResponse(request) + } + } + val pipeline = + HttpPipelineBuilder(client) + .append(DefaultRetryStep(opts, zeroDelayClock())) + .build() + + val response = pipeline.send(getRequest()) + assertEquals(200, response.status.code) + assertEquals(2, attempts.get()) + } + // ----------------- Error propagation ----------------- @Test diff --git a/sdk-core/src/test/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryAfterParserTest.kt b/sdk-core/src/test/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryAfterParserTest.kt index dd714195..018e54b4 100644 --- a/sdk-core/src/test/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryAfterParserTest.kt +++ b/sdk-core/src/test/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryAfterParserTest.kt @@ -58,6 +58,50 @@ class RetryAfterParserTest { assertEquals(Duration.ofSeconds(7), RetryAfterParser.parse(headers("Retry-After" to " 7 "), now)) } + @Test + fun `fractional Retry-After parses as sub-second delay`() { + // RFC 7231 delta-seconds is integer-only, but real servers and proxies emit `1.5`. + // 1.5 s == 1_500 ms. + assertEquals(Duration.ofMillis(1500), RetryAfterParser.parse(headers("Retry-After" to "1.5"), now)) + } + + @Test + fun `fractional Retry-After below one second parses`() { + // 0.25 s == 250 ms — the integer parser would have rejected this entirely. + assertEquals(Duration.ofMillis(250), RetryAfterParser.parse(headers("Retry-After" to "0.25"), now)) + } + + @Test + fun `fractional Retry-After negative returns null`() { + // The existing negative clamp must survive the switch to fractional parsing. + assertNull(RetryAfterParser.parse(headers("Retry-After" to "-1.5"), now)) + } + + @Test + fun `fractional Retry-After NaN returns null`() { + assertNull(RetryAfterParser.parse(headers("Retry-After" to "NaN"), now)) + } + + @Test + fun `fractional Retry-After infinity returns null`() { + assertNull(RetryAfterParser.parse(headers("Retry-After" to "Infinity"), now)) + } + + @Test + fun `fractional Retry-After far-future value is clamped to 365 days`() { + // A value larger than the 365-day ceiling must clamp, not overflow toNanos(). + val tenYearsSeconds = Duration.ofDays(3650).seconds.toString() + assertEquals(Duration.ofDays(365), RetryAfterParser.parse(headers("Retry-After" to tenYearsSeconds), now)) + } + + @Test + fun `parseHeaderValue dispatches fractional Retry-After to sub-second delay`() { + assertEquals( + Duration.ofMillis(2500), + RetryAfterParser.parseHeaderValue(HttpHeaderName.RETRY_AFTER, "2.5", now), + ) + } + // endregion // region -- HTTP-date Retry-After -- @@ -278,10 +322,13 @@ class RetryAfterParserTest { } @Test - fun `numeric Retry-After of Long MAX_VALUE does not throw`() { - // Duration.ofSeconds(Long.MAX_VALUE) is representable; the downstream consumer clamps. + fun `numeric Retry-After of Long MAX_VALUE clamps to 365 days`() { + // The numeric form now shares the 365-day ceiling applied to the HTTP-date and + // Unix-epoch forms: a far-future delta is clamped before the nanosecond conversion so + // a later Duration.toNanos() can never overflow. No real pacing header asks a client to + // wait centuries, so clamping here loses nothing operationally. val result = RetryAfterParser.parse(headers("Retry-After" to Long.MAX_VALUE.toString()), now) - assertEquals(Duration.ofSeconds(Long.MAX_VALUE), result) + assertEquals(Duration.ofDays(MAX_CLAMP_DAYS), result) } // endregion diff --git a/sdk-core/src/test/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryStepTest.kt b/sdk-core/src/test/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryStepTest.kt index 0b5b3ec3..62970e36 100644 --- a/sdk-core/src/test/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryStepTest.kt +++ b/sdk-core/src/test/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryStepTest.kt @@ -9,6 +9,7 @@ package org.dexpace.sdk.core.pipeline.step.retry import org.dexpace.sdk.core.client.HttpClient import org.dexpace.sdk.core.http.common.Headers +import org.dexpace.sdk.core.http.common.HttpHeaderName import org.dexpace.sdk.core.http.common.Protocol import org.dexpace.sdk.core.http.request.Method import org.dexpace.sdk.core.http.request.Request @@ -21,6 +22,7 @@ import org.dexpace.sdk.core.http.response.exception.NetworkException import org.dexpace.sdk.core.pipeline.ResponseOutcome import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Assertions.assertFalse +import org.junit.jupiter.api.Assertions.assertNull import org.junit.jupiter.api.Assertions.assertSame import org.junit.jupiter.api.Assertions.assertThrows import org.junit.jupiter.api.Assertions.assertTrue @@ -597,12 +599,115 @@ class RetryStepTest { // endregion + // region -- per-attempt retry-count header -- + + @Test + fun `attempt header is absent by default`() { + // Opt-in feature: with the default settings no attempt header is stamped on retries. + val client = + FakeClient( + listOf( + Canned.Err(httpException(SC_SERVICE_UNAVAILABLE)), + Canned.Ok(response(SC_OK)), + ), + ) + val step = RetryStep(client, zeroDelaySettings(InstantScheduler()), requestGet()) + val out = step.invoke(ResponseOutcome.Failure(httpException(SC_SERVICE_UNAVAILABLE))) + assertTrue(out is ResponseOutcome.Success) + // Two retried sends were made; neither carries the attempt header. + assertEquals(2, client.calls.size) + client.calls.forEach { call -> + assertNull(call.headers.get(DEFAULT_ATTEMPT_HEADER), "no attempt header should be stamped by default") + } + } + + @Test + fun `attempt header is stamped with the attempt ordinal on each send via attempt()`() { + // attempt() drives the initial send too, so we can observe the full ordinal sequence: + // the original send is attempt 1, the first retry is 2, the second retry is 3. + val client = + FakeClient( + listOf( + Canned.Err(httpException(SC_SERVICE_UNAVAILABLE)), + Canned.Err(httpException(SC_SERVICE_UNAVAILABLE)), + Canned.Ok(response(SC_OK)), + ), + ) + val settings = + zeroDelaySettings(InstantScheduler()) + .newBuilder() + .attemptHeaderName(DEFAULT_ATTEMPT_HEADER) + .build() + val step = RetryStep(client, settings, requestGet()) + + val response = step.attempt() + assertEquals(SC_OK, response.status.code) + assertEquals(3, client.calls.size) + assertEquals("1", client.calls[0].headers.get(DEFAULT_ATTEMPT_HEADER)) + assertEquals("2", client.calls[1].headers.get(DEFAULT_ATTEMPT_HEADER)) + assertEquals("3", client.calls[2].headers.get(DEFAULT_ATTEMPT_HEADER)) + } + + @Test + fun `attempt header uses the configured header name`() { + val custom = HttpHeaderName.fromString("X-My-Retry-Count") + val client = + FakeClient( + listOf( + Canned.Err(httpException(SC_SERVICE_UNAVAILABLE)), + Canned.Ok(response(SC_OK)), + ), + ) + val settings = + zeroDelaySettings(InstantScheduler()) + .newBuilder() + .attemptHeaderName(custom) + .build() + val step = RetryStep(client, settings, requestGet()) + + val out = step.invoke(ResponseOutcome.Failure(httpException(SC_SERVICE_UNAVAILABLE))) + assertTrue(out is ResponseOutcome.Success) + // invoke() does not control the external initial send (ordinal 1), so the retried sends + // it dispatches start at ordinal 2. Two retries fire here under the configured name. + assertEquals(2, client.calls.size) + assertEquals("2", client.calls[0].headers.get(custom)) + assertEquals("3", client.calls[1].headers.get(custom)) + } + + @Test + fun `attempt header is stamped on a per-attempt copy and does not mutate the template request`() { + val client = + FakeClient( + listOf( + Canned.Err(httpException(SC_SERVICE_UNAVAILABLE)), + Canned.Ok(response(SC_OK)), + ), + ) + val request = requestGet() + val settings = + zeroDelaySettings(InstantScheduler()) + .newBuilder() + .attemptHeaderName(DEFAULT_ATTEMPT_HEADER) + .build() + val step = RetryStep(client, settings, request) + + step.invoke(ResponseOutcome.Failure(httpException(SC_SERVICE_UNAVAILABLE))) + // The retried request copy carries the header; the immutable template never gains it. + assertEquals("2", client.calls[0].headers.get(DEFAULT_ATTEMPT_HEADER)) + assertNull(request.headers.get(DEFAULT_ATTEMPT_HEADER), "template request must stay unmodified") + } + + // endregion + private companion object { // HTTP status code constants for tests. private const val SC_OK = 200 private const val SC_NOT_FOUND = 404 private const val SC_SERVICE_UNAVAILABLE = 503 + // Header used by the per-attempt retry-count tests. + private val DEFAULT_ATTEMPT_HEADER: HttpHeaderName = HttpHeaderName.fromString("X-Retry-Attempt") + private const val DEFAULT_MAX_ATTEMPTS = 3 private const val DEFAULT_MAX_ATTEMPTS_TIMEOUT = 3 From a2713a2589b253d223d723d592f321417daaab99 Mon Sep 17 00:00:00 2001 From: OmarAlJarrah Date: Tue, 16 Jun 2026 16:24:25 +0300 Subject: [PATCH 2/4] fix: unify retry defaults and backoff across both retry stacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The stage-based DefaultRetryStep and the recovery-aware RetryStep shipped divergent retry defaults and two different backoff formulas. DefaultRetryStep used an 800ms base delay, a hard-coded `1L shl tryCount` schedule, and ±5% asymmetric jitter; RetryStep used a 200ms initial delay, a configurable 2.0 multiplier, and 0.2 symmetric jitter via BackoffCalculator. They also disagreed on retryable statuses: RetryUtils (and HttpException.retryable) treat 408 as retryable, but RetrySettings dropped it from the default set, so the two stacks retried different responses. Reconcile both around a single source of truth: - BackoffCalculator is now the only backoff implementation. DefaultRetryStep computes its exponential delay through it (via a RetrySettings view built from the options plus the shared multiplier/jitter constants), with the total-timeout deadline disabled since the stage-based step carries no budget. The duplicate shift-based formula, its jitter helper, and the now dead MAX_SHIFT_TRY_COUNT constant are removed. - The canonical defaults live as public constants on RetrySettings (DEFAULT_DELAY_MULTIPLIER, DEFAULT_JITTER, DEFAULT_MAX_ATTEMPTS, plus the existing delay constants). HttpRetryOptions references them, so its base delay (200ms), max delay (8s), and retry budget can no longer drift. Both stacks now default to 3 total sends (HttpRetryOptions.maxRetries=2, RetrySettings.maxAttempts=3). - 408 is retryable by default everywhere: it is added to RetrySettings.DEFAULT_RETRYABLE_STATUSES, matching RetryUtils and the stage-based classifier. Updates KDoc and docs/pipelines.md to state the shared values, adds tests pinning the defaults and the backoff sequence on both stacks, and refreshes the existing jitter/clamp assertions for the unified ±10% jitter. Regenerates the sdk-core API snapshot for the constant changes. --- docs/pipelines.md | 16 +- sdk-core/api/sdk-core.api | 4 +- .../http/pipeline/steps/DefaultRetryStep.kt | 109 ++++----- .../http/pipeline/steps/HttpRetryOptions.kt | 32 ++- .../core/pipeline/step/retry/RetrySettings.kt | 54 ++++- .../core/http/pipeline/steps/RetryStepTest.kt | 70 +++--- .../retry/RetryDefaultsReconciliationTest.kt | 229 ++++++++++++++++++ .../pipeline/step/retry/RetrySettingsTest.kt | 26 ++ 8 files changed, 412 insertions(+), 128 deletions(-) create mode 100644 sdk-core/src/test/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryDefaultsReconciliationTest.kt diff --git a/docs/pipelines.md b/docs/pipelines.md index 4130dc23..abc05898 100644 --- a/docs/pipelines.md +++ b/docs/pipelines.md @@ -383,13 +383,21 @@ and carries: | `maxDelay` | `8s` | Cap on the scaled delay | | `maxAttempts` | `3` | Total attempts including the first send; `1` disables retries | | `jitter` | `0.2` | Symmetric jitter fraction in `[0.0, 1.0]` | -| `retryableStatuses` | `{429, 500, 502, 503, 504}` | Status codes that trigger a retry on an `HttpException` | +| `retryableStatuses` | `{408, 429, 500, 502, 503, 504}` | Status codes that trigger a retry on an `HttpException` | | `retryableMethods` | `{GET, HEAD, OPTIONS, PUT, DELETE}` | Methods retryable by RFC 9110; others need a replayable body | | `scheduler` | `null` | Optional caller scheduler; `null` uses a daemon scheduler | -`408` (Request Timeout) is intentionally excluded from the default `retryableStatuses` — a -server-side 408 usually means the client was slow to send and is unlikely to improve on retry. -Callers that disagree can opt in via the builder. +These are the SDK's canonical retry defaults: the stage-based `DefaultRetryStep` (and its +`HttpRetryOptions`) share the same base delay (`200ms`), max delay (`8s`), multiplier (`2.0`), +jitter (`0.2`), retryable-status policy, and total send budget (3 attempts). Both stacks compute +their exponential schedule through the one `BackoffCalculator`, so the two cannot drift apart; +the only intentional difference is that the stage-based step has no `totalTimeout` deadline. +`HttpRetryOptions` counts *retries* (`maxRetries`, default `2`) while `RetrySettings` counts +*total attempts* (`maxAttempts`, default `3`) — both default to the same 3 sends. + +`408` (Request Timeout) **is** retryable by default, matching +`RetryUtils.isRetryable`/`HttpException.retryable` and the stage-based step. Callers wanting a +stricter posture can pass a tighter `retryableStatuses` set to the builder. --- diff --git a/sdk-core/api/sdk-core.api b/sdk-core/api/sdk-core.api index f136a561..12260bc1 100644 --- a/sdk-core/api/sdk-core.api +++ b/sdk-core/api/sdk-core.api @@ -787,7 +787,6 @@ public class org/dexpace/sdk/core/http/pipeline/steps/DefaultRedirectStep : org/ public class org/dexpace/sdk/core/http/pipeline/steps/DefaultRetryStep : org/dexpace/sdk/core/http/pipeline/steps/RetryStep { public static final field Companion Lorg/dexpace/sdk/core/http/pipeline/steps/DefaultRetryStep$Companion; public static final field DEFAULT_MAX_RETRIES I - public static final field MAX_SHIFT_TRY_COUNT I public fun ()V public fun (Lorg/dexpace/sdk/core/http/pipeline/steps/HttpRetryOptions;)V public fun (Lorg/dexpace/sdk/core/http/pipeline/steps/HttpRetryOptions;Lorg/dexpace/sdk/core/util/Clock;)V @@ -2122,7 +2121,10 @@ public final class org/dexpace/sdk/core/pipeline/step/retry/RetryAfterParser { public final class org/dexpace/sdk/core/pipeline/step/retry/RetrySettings { public static final field Companion Lorg/dexpace/sdk/core/pipeline/step/retry/RetrySettings$Companion; + public static final field DEFAULT_DELAY_MULTIPLIER D public static final field DEFAULT_INITIAL_DELAY Ljava/time/Duration; + public static final field DEFAULT_JITTER D + public static final field DEFAULT_MAX_ATTEMPTS I public static final field DEFAULT_MAX_DELAY Ljava/time/Duration; public static final field DEFAULT_RETRYABLE_METHODS Ljava/util/Set; public static final field DEFAULT_RETRYABLE_STATUSES Ljava/util/Set; diff --git a/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/DefaultRetryStep.kt b/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/DefaultRetryStep.kt index 9ecb37c3..8b9862aa 100644 --- a/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/DefaultRetryStep.kt +++ b/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/DefaultRetryStep.kt @@ -13,12 +13,13 @@ import org.dexpace.sdk.core.http.request.Method import org.dexpace.sdk.core.http.request.Request import org.dexpace.sdk.core.http.response.Response import org.dexpace.sdk.core.instrumentation.ClientLogger +import org.dexpace.sdk.core.pipeline.step.retry.BackoffCalculator import org.dexpace.sdk.core.pipeline.step.retry.RetryAfterParser +import org.dexpace.sdk.core.pipeline.step.retry.RetrySettings import org.dexpace.sdk.core.util.Clock import java.io.IOException import java.io.InterruptedIOException import java.time.Duration -import java.util.concurrent.ThreadLocalRandom /** * Default [RetryStep]. Drives an iterative retry loop with classified failure detection, @@ -40,9 +41,8 @@ import java.util.concurrent.ThreadLocalRandom * 4. Sleeps via [Clock.sleep]; an interrupt during sleep throws [InterruptedIOException] * with the original [InterruptedException] and any accumulated prior failures attached * as suppressed — retries are NOT resumed after an interrupt. - * 5. Caps `tryCount` at [MAX_SHIFT_TRY_COUNT] before computing `1L shl tryCount` so the - * left-shift can never overflow (the resulting delay is clamped to [HttpRetryOptions.maxDelay] - * anyway, so the cap is invisible to callers). + * 5. Computes the exponential delay through the shared [BackoffCalculator], which saturates + * rather than overflows on extreme attempt counts and clamps to [HttpRetryOptions.maxDelay]. * * ## Body replayability * @@ -64,8 +64,12 @@ import java.util.concurrent.ThreadLocalRandom * parsed from the response (response path only). A negative or unparseable value * falls through; a value of zero produces an immediate retry. * 3. [HttpRetryOptions.fixedDelay] — if set, every retry waits exactly this duration. - * 4. Exponential backoff: `baseDelay * (1L shl tryCount)` clamped to `maxDelay`, with - * ±5% random jitter via [ThreadLocalRandom]. + * 4. Exponential backoff computed by the shared [BackoffCalculator]: + * `baseDelay * 2.0^tryCount` clamped to `maxDelay`, with symmetric ±10% jitter + * ([RetrySettings.DEFAULT_JITTER]). This is the same calculator the recovery-aware + * `pipeline.step.retry.RetryStep` uses, so both stacks share one backoff formula and one + * set of defaults. The deadline-shrinking the calculator also offers is disabled here + * (this stage-based step carries no total-timeout budget). * * ## Failure handling * @@ -141,6 +145,24 @@ public open class DefaultRetryStep */ private val options: HttpRetryOptions = clampOptions(options) + /** + * The [options]' exponential parameters expressed as a [RetrySettings] view so the shared + * [BackoffCalculator] can compute this stack's schedule. Built once per step instance: + * - `initialDelay` / `maxDelay` come from the options. + * - `delayMultiplier` (2.0) and `jitter` (0.2) are the canonical shared constants — the + * options object does not expose its own multiplier/jitter, so the SDK defaults apply. + * - `totalTimeout = ZERO` disables the deadline cap: the stage-based step has no budget. + * The `fixedDelay` path never consults this view; it short-circuits in [backoffOrFixed]. + */ + private val backoffSettings: RetrySettings = + RetrySettings.builder() + .initialDelay(this.options.baseDelay) + .maxDelay(this.options.maxDelay) + .delayMultiplier(RetrySettings.DEFAULT_DELAY_MULTIPLIER) + .jitter(RetrySettings.DEFAULT_JITTER) + .totalTimeout(Duration.ZERO) + .build() + /** * Sends [request] through the downstream pipeline with automatic retry on retryable failures. * @@ -429,51 +451,16 @@ public open class DefaultRetryStep } /** - * Returns [HttpRetryOptions.fixedDelay] if set, otherwise the exponential-backoff - * delay for [tryCount]. The shift count is capped at [MAX_SHIFT_TRY_COUNT] so the - * `1L shl tryCount` term never overflows; the result is always clamped to [HttpRetryOptions.maxDelay] - * anyway, so the cap is invisible in practice. - */ - private fun backoffOrFixed(tryCount: Int): Duration = options.fixedDelay ?: exponentialBackoff(tryCount) - - /** - * `baseDelay * (1L shl tryCount)` clamped to `maxDelay`, plus a ±5% jitter sampled - * from [ThreadLocalRandom]. Pure function of [tryCount] and the configured options. - */ - private fun exponentialBackoff(tryCount: Int): Duration { - val baseNanos = options.baseDelay.toNanos() - if (baseNanos == 0L) return Duration.ZERO - val maxNanos = options.maxDelay.toNanos() - val safeShift = tryCount.coerceAtMost(MAX_SHIFT_TRY_COUNT) - // 1L shl 30 ~= 1e9 — multiplying by 800ms (8e8 ns) overflows. Cap on the long - // multiply itself: if `baseNanos * (1L shl safeShift)` would overflow, clamp. - val multiplier = 1L shl safeShift - val scaled = - if (baseNanos > 0 && multiplier > Long.MAX_VALUE / baseNanos) { - Long.MAX_VALUE - } else { - baseNanos * multiplier - } - val clamped = scaled.coerceAtMost(maxNanos) - val jittered = applyJitter(clamped) - // Guarantee a non-negative result — jitter could push us under zero if the caller - // configured pathological options (e.g. baseDelay equal to negative epsilon). - return Duration.ofNanos(jittered.coerceAtLeast(0L)) - } - - /** - * Applies a ±5% jitter to [nanos]. Sample is drawn from [ThreadLocalRandom] which is - * per-thread, so there is no cross-thread contention on the retry hot path. + * Returns [HttpRetryOptions.fixedDelay] if set, otherwise the exponential-backoff delay + * for [tryCount]. The backoff is computed by the shared [BackoffCalculator] from + * [backoffSettings] so this stack and the recovery-aware `RetryStep` share one formula. + * + * [tryCount] is 0-indexed here (`0` = the delay before the first retry), whereas + * [BackoffCalculator.computeDelay] is 1-indexed (`1` = first retry); the `+ 1` bridges + * the two so both produce `baseDelay`, `2·baseDelay`, `4·baseDelay`, … capped at `maxDelay`. */ - private fun applyJitter(nanos: Long): Long { - if (nanos == 0L) return 0L - // 5% of nanos, used as the magnitude bound on the random sample. - val jitterMagnitude = nanos / JITTER_DIVISOR - if (jitterMagnitude == 0L) return nanos - // ThreadLocalRandom.nextLong(origin, bound) is inclusive of origin, exclusive of bound. - val offset = ThreadLocalRandom.current().nextLong(-jitterMagnitude, jitterMagnitude + 1L) - return nanos + offset - } + private fun backoffOrFixed(tryCount: Int): Duration = + options.fixedDelay ?: BackoffCalculator.computeDelay(tryCount + 1, backoffSettings) // --------------- Retry-After parsing --------------- @@ -573,22 +560,14 @@ public open class DefaultRetryStep public companion object { /** - * Default [HttpRetryOptions.maxRetries] applied when the caller passes a negative - * value. Matches Azure Core's `RetryOptions` default. + * Default retry count applied when the caller passes a negative + * [HttpRetryOptions.maxRetries], and the value baked into the no-arg + * [HttpRetryOptions] default. `2` retries on top of the initial send is the SDK's + * canonical budget — `initial + DEFAULT_MAX_RETRIES == 3`, matching + * [RetrySettings.DEFAULT_MAX_ATTEMPTS] so both retry stacks default to the same + * number of total sends. */ - public const val DEFAULT_MAX_RETRIES: Int = 3 - - /** - * Upper bound on `tryCount` used for the `1L shl tryCount` term in - * [DefaultRetryStep.exponentialBackoff]. `1L shl 30` ~= 1.07e9 — the scaled delay is - * always clamped to [HttpRetryOptions.maxDelay] long before this bound is hit, so the - * cap is a paranoid guard against integer overflow rather than a behavior knob. - */ - public const val MAX_SHIFT_TRY_COUNT: Int = 30 - - // Jitter is ±5% of the computed delay; expressed as the divisor (nanos / 20) to - // avoid an extra multiplication on the hot path. See [applyJitter]. - private const val JITTER_DIVISOR = 20L + public const val DEFAULT_MAX_RETRIES: Int = 2 // Nanoseconds in one millisecond — used to convert monotonic-clock deltas to ms // for retry log events. diff --git a/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/HttpRetryOptions.kt b/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/HttpRetryOptions.kt index 0936712d..8248ac25 100644 --- a/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/HttpRetryOptions.kt +++ b/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/HttpRetryOptions.kt @@ -8,6 +8,7 @@ package org.dexpace.sdk.core.http.pipeline.steps import org.dexpace.sdk.core.http.common.HttpHeaderName +import org.dexpace.sdk.core.pipeline.step.retry.RetrySettings import org.dexpace.sdk.core.util.RetryUtils import java.time.Duration import java.util.Collections @@ -40,12 +41,15 @@ public fun interface HttpRetryDelayProvider { } /** - * Configuration for [DefaultRetryStep]. Defaults mirror Azure Core's retry policy: - * - [maxRetries] = 3 - * - [baseDelay] = 800ms (exponentially scaled per attempt) - * - [maxDelay] = 8s (cap on the scaled delay) + * Configuration for [DefaultRetryStep]. The numeric defaults are the SDK's canonical retry + * defaults, shared with the recovery-aware [RetrySettings] so both retry stacks compute the + * same backoff schedule (via [org.dexpace.sdk.core.pipeline.step.retry.BackoffCalculator]): + * - [maxRetries] = 2 (initial send + 2 retries = 3 total attempts, matching + * [RetrySettings.DEFAULT_MAX_ATTEMPTS]). + * - [baseDelay] = 200ms (= [RetrySettings.DEFAULT_INITIAL_DELAY]; exponentially scaled per attempt). + * - [maxDelay] = 8s (= [RetrySettings.DEFAULT_MAX_DELAY]; cap on the scaled delay). * - [fixedDelay] = null (exponential backoff is used; when non-null it overrides the - * backoff entirely and every retry waits exactly [fixedDelay]) + * backoff entirely and every retry waits exactly [fixedDelay]). * - [retryAfterHeaders] = `Retry-After`, `retry-after-ms`, `x-ms-retry-after-ms` — * parsed in declared order; the first present wins. Drop the Microsoft-specific * variants by passing a tighter list for stricter posture. @@ -54,15 +58,20 @@ public fun interface HttpRetryDelayProvider { * anywhere in the cause chain, per [RetryUtils.isRetryable]. * - [delayFromCondition] = null delay (falls through to `Retry-After` parsing, then backoff). * + * The exponential schedule, multiplier (2.0), and symmetric jitter (0.2) are sourced from the + * shared [RetrySettings] constants and applied by `BackoffCalculator`, so this stack and the + * recovery-aware stack cannot drift apart. The one intentional difference is the deadline: this + * stage-based step has no total-timeout budget, so it never shrinks a delay against one. + * * The companion [HttpRetryOptions.fixed] factory builds an options instance whose delay * never grows — useful for test injection or high-throughput retry against flaky endpoints. */ public class HttpRetryOptions @JvmOverloads constructor( - public val maxRetries: Int = 3, - public val baseDelay: Duration = Duration.ofMillis(DEFAULT_BASE_DELAY_MS), - public val maxDelay: Duration = Duration.ofSeconds(DEFAULT_MAX_DELAY_SECONDS), + public val maxRetries: Int = DEFAULT_MAX_RETRIES, + public val baseDelay: Duration = RetrySettings.DEFAULT_INITIAL_DELAY, + public val maxDelay: Duration = RetrySettings.DEFAULT_MAX_DELAY, public val fixedDelay: Duration? = null, public val retryAfterHeaders: List = DEFAULT_RETRY_AFTER_HEADERS, public val shouldRetryCondition: HttpRetryConditionPredicate = @@ -72,10 +81,9 @@ public class HttpRetryOptions public val delayFromCondition: HttpRetryDelayProvider = HttpRetryDelayProvider { null }, ) { public companion object { - // Default exponential-backoff parameters tuned to favour fast first-retry while - // bounding cumulative latency. Aligned with Azure Core's RetryOptions defaults. - private const val DEFAULT_BASE_DELAY_MS = 800L - private const val DEFAULT_MAX_DELAY_SECONDS = 8L + // The default retry count is the canonical SDK budget, kept in one place on + // DefaultRetryStep (initial send + DEFAULT_MAX_RETRIES == RetrySettings.DEFAULT_MAX_ATTEMPTS). + private const val DEFAULT_MAX_RETRIES = DefaultRetryStep.DEFAULT_MAX_RETRIES /** * The three `Retry-After` header forms parsed by [DefaultRetryStep]. Order matters — diff --git a/sdk-core/src/main/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetrySettings.kt b/sdk-core/src/main/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetrySettings.kt index d54f5bb7..26c1e124 100644 --- a/sdk-core/src/main/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetrySettings.kt +++ b/sdk-core/src/main/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetrySettings.kt @@ -28,13 +28,17 @@ private val MAX_NANO_REPRESENTABLE_DELAY: Duration = Duration.ofNanos(Long.MAX_V * timeout, capped `maxDelay`, `maxAttempts`. * * ## Defaults (Square + gax tuned) + * + * These are the canonical SDK retry defaults, shared with the stage-based + * [DefaultRetryStep][org.dexpace.sdk.core.http.pipeline.steps.DefaultRetryStep] so both retry + * stacks compute the same backoff schedule and honour the same retryable-status policy. * - [totalTimeout] = 30 s — hard budget for all attempts combined. * - [initialDelay] = 200 ms — first retry waits this long (subject to jitter). * - [delayMultiplier] = 2.0 — each subsequent delay is multiplied by this factor. * - [maxDelay] = 8 s — cap on the scaled delay; further attempts plateau here. * - [maxAttempts] = 3 — total attempts including the initial call (matches Square's default). * - [jitter] = 0.2 — symmetric jitter fraction. Effective delay ∈ `[delay*(1−j/2), delay*(1+j/2)]`. - * - [retryableStatuses] = `{429, 500, 502, 503, 504}` — canonical retryable codes. + * - [retryableStatuses] = `{408, 429, 500, 502, 503, 504}` — canonical retryable codes. * - [retryableMethods] = `{GET, HEAD, OPTIONS, PUT, DELETE}` — safe-by-method per RFC 9110. * `POST`/`PATCH`/etc. retry only when the request body is replayable. * - [scheduler] = `null` — fall back to the lazy daemon scheduler created by [RetryStep]. @@ -227,36 +231,61 @@ public class RetrySettings // Defaults — see class kdoc for rationale. private const val DEFAULT_TOTAL_TIMEOUT_SECONDS = 30L private const val DEFAULT_INITIAL_DELAY_MS = 200L - private const val DEFAULT_DELAY_MULTIPLIER = 2.0 private const val DEFAULT_MAX_DELAY_SECONDS = 8L - private const val DEFAULT_MAX_ATTEMPTS = 3 - private const val DEFAULT_JITTER = 0.2 + + /** + * Canonical exponential-backoff multiplier shared by both retry stacks: each delay is + * `previous * 2.0`. Exposed so the stage-based [DefaultRetryStep][org.dexpace.sdk.core.http.pipeline.steps.DefaultRetryStep] + * and the recovery-aware [RetryStep] compute the same schedule from one constant. + */ + public const val DEFAULT_DELAY_MULTIPLIER: Double = 2.0 + + /** + * Canonical symmetric jitter fraction shared by both retry stacks. The effective delay + * is drawn uniformly from `[delay * (1 - j/2), delay * (1 + j/2)]`, i.e. ±10% at the + * default `0.2`. + */ + public const val DEFAULT_JITTER: Double = 0.2 + + /** + * Canonical retry budget shared by both stacks: **3 total attempts**, i.e. the initial + * send plus 2 retries. The recovery-aware [RetryStep] counts total attempts directly + * via [maxAttempts]; the stage-based step counts retries, so its equivalent default is + * [DefaultRetryStep.DEFAULT_MAX_RETRIES][org.dexpace.sdk.core.http.pipeline.steps.DefaultRetryStep.DEFAULT_MAX_RETRIES] + * `= 2`. Both yield the same 3 sends. + */ + public const val DEFAULT_MAX_ATTEMPTS: Int = 3 /** Default total timeout: 30 s. */ @JvmField public val DEFAULT_TOTAL_TIMEOUT: Duration = Duration.ofSeconds(DEFAULT_TOTAL_TIMEOUT_SECONDS) - /** Default initial delay: 200 ms. */ + /** Default initial delay: 200 ms. Shared with the stage-based step's base delay. */ @JvmField public val DEFAULT_INITIAL_DELAY: Duration = Duration.ofMillis(DEFAULT_INITIAL_DELAY_MS) - /** Default max delay cap: 8 s. */ + /** Default max delay cap: 8 s. Shared with the stage-based step. */ @JvmField public val DEFAULT_MAX_DELAY: Duration = Duration.ofSeconds(DEFAULT_MAX_DELAY_SECONDS) /** - * Default retryable HTTP statuses: `429` (rate limit), `500` (internal), `502` - * (bad gateway), `503` (service unavailable), `504` (gateway timeout). + * Default retryable HTTP statuses: `408` (request timeout), `429` (rate limit), `500` + * (internal), `502` (bad gateway), `503` (service unavailable), `504` (gateway + * timeout). * - * Note: `408` (Request Timeout) is intentionally NOT in the default set — a 408 - * from a server usually means the client took too long to send the request, - * which is unlikely to improve on retry. Callers that disagree can opt in via - * the builder. + * This is the recovery-aware step's allow-list; [RetryStep] intersects it with + * [HttpException.retryable][org.dexpace.sdk.core.http.response.exception.HttpException.retryable] + * (which is itself derived from [RetryUtils.isRetryable][org.dexpace.sdk.core.util.RetryUtils.isRetryable]). + * With this default set the recovery-aware step retries the same common statuses the + * stage-based [DefaultRetryStep][org.dexpace.sdk.core.http.pipeline.steps.DefaultRetryStep] + * does via `RetryUtils` — `408` included, so the two stacks agree on the 408 stance. + * Callers wanting a stricter posture can pass a tighter set to the builder. */ @JvmField public val DEFAULT_RETRYABLE_STATUSES: Set = Collections.unmodifiableSet( linkedSetOf( + SC_REQUEST_TIMEOUT, SC_TOO_MANY_REQUESTS, SC_INTERNAL_ERROR, SC_BAD_GATEWAY, @@ -277,6 +306,7 @@ public class RetrySettings ) // Spelled-out status constants to satisfy detekt's MagicNumber rule. + private const val SC_REQUEST_TIMEOUT = 408 private const val SC_TOO_MANY_REQUESTS = 429 private const val SC_INTERNAL_ERROR = 500 private const val SC_BAD_GATEWAY = 502 diff --git a/sdk-core/src/test/kotlin/org/dexpace/sdk/core/http/pipeline/steps/RetryStepTest.kt b/sdk-core/src/test/kotlin/org/dexpace/sdk/core/http/pipeline/steps/RetryStepTest.kt index 7fa2c3c4..a3442efc 100644 --- a/sdk-core/src/test/kotlin/org/dexpace/sdk/core/http/pipeline/steps/RetryStepTest.kt +++ b/sdk-core/src/test/kotlin/org/dexpace/sdk/core/http/pipeline/steps/RetryStepTest.kt @@ -295,7 +295,7 @@ class RetryStepTest { fun `Retry-After negative falls back to default backoff`() { val clock = FixedClock() // Configure a deterministic backoff: 100ms base with 100ms cap removes the - // exponential growth window and the jitter range is at most 5ms. + // exponential growth window and the symmetric jitter range is at most ±10ms (±10%). val opts = HttpRetryOptions( baseDelay = Duration.ofMillis(100), @@ -313,10 +313,10 @@ class RetryStepTest { val before = clock.now() pipeline.send(getRequest()) - // 100ms ± 5ms jitter — Duration#compareTo lets us range-check. + // 100ms ± 10ms symmetric jitter — Duration#compareTo lets us range-check. val elapsed = Duration.between(before, clock.now()) - assertTrue(elapsed >= Duration.ofMillis(95), "elapsed=$elapsed below 95ms") - assertTrue(elapsed <= Duration.ofMillis(105), "elapsed=$elapsed above 105ms") + assertTrue(elapsed >= Duration.ofMillis(90), "elapsed=$elapsed below 90ms") + assertTrue(elapsed <= Duration.ofMillis(110), "elapsed=$elapsed above 110ms") } @Test @@ -340,15 +340,15 @@ class RetryStepTest { val before = clock.now() pipeline.send(getRequest()) val elapsed = Duration.between(before, clock.now()) - assertTrue(elapsed >= Duration.ofMillis(95)) - assertTrue(elapsed <= Duration.ofMillis(105)) + assertTrue(elapsed >= Duration.ofMillis(90)) + assertTrue(elapsed <= Duration.ofMillis(110)) } @Test fun `custom retryAfterHeaders list ignores non-listed variants`() { val clock = FixedClock() // Only listen to the standard `Retry-After`; the `retry-after-ms` header is - // therefore invisible and we fall back to backoff (100ms ± 5ms). + // therefore invisible and we fall back to backoff (100ms ± 10ms symmetric jitter). val opts = HttpRetryOptions( baseDelay = Duration.ofMillis(100), @@ -368,8 +368,8 @@ class RetryStepTest { val before = clock.now() pipeline.send(getRequest()) val elapsed = Duration.between(before, clock.now()) - assertTrue(elapsed >= Duration.ofMillis(95), "elapsed=$elapsed below 95ms") - assertTrue(elapsed <= Duration.ofMillis(105), "elapsed=$elapsed above 105ms") + assertTrue(elapsed >= Duration.ofMillis(90), "elapsed=$elapsed below 90ms") + assertTrue(elapsed <= Duration.ofMillis(110), "elapsed=$elapsed above 110ms") } // ----------------- Exception classifier ----------------- @@ -833,7 +833,7 @@ class RetryStepTest { } @Test - fun `exponential backoff grows roughly 100 200 400 with 5pct jitter`() { + fun `exponential backoff grows roughly 100 200 400 with symmetric jitter`() { val clock = FixedClock() val opts = HttpRetryOptions( @@ -855,14 +855,14 @@ class RetryStepTest { val before = clock.now() pipeline.send(getRequest()) - // Expected: 100 + 200 + 400 = 700ms, ± 5% on each → ± 35ms total. + // Expected: 100 + 200 + 400 = 700ms, with symmetric ±10% jitter per term → ± 70ms total. val elapsed = Duration.between(before, clock.now()) - assertTrue(elapsed >= Duration.ofMillis(665), "elapsed=$elapsed below lower bound") - assertTrue(elapsed <= Duration.ofMillis(735), "elapsed=$elapsed above upper bound") + assertTrue(elapsed >= Duration.ofMillis(630), "elapsed=$elapsed below lower bound") + assertTrue(elapsed <= Duration.ofMillis(770), "elapsed=$elapsed above upper bound") } @Test - fun `jitter stays within +- 5 percent across many samples`() { + fun `jitter stays within +- 10 percent across many samples`() { // Drive the same try count repeatedly through the step and check the spread is // within the documented bound. Run via a small reflective-ish hack: invoke the // backoff path indirectly by sending many one-retry sequences and reading the @@ -889,9 +889,10 @@ class RetryStepTest { pipeline.send(getRequest()) samples.add(Duration.between(before, clock.now()).toMillis()) } - // 5% of 1000ms is 50ms. + // Symmetric jitter is ±10% of the delay; half-range of 1000ms is 100ms, so every + // sample lands in [900, 1100]. for (ms in samples) { - assertTrue(ms in 950..1050, "sample $ms outside [950,1050]") + assertTrue(ms in 900..1100, "sample $ms outside [900,1100]") } // Distribution sanity: at least 10% of samples should be on either side of the // mean to confirm the jitter is non-trivially applied. Tight assertion to catch @@ -1312,9 +1313,9 @@ class RetryStepTest { val before = clock.now() pipeline.send(getRequest()) val elapsed = Duration.between(before, clock.now()) - // Within the 5% jitter window of 100ms. - assertTrue(elapsed >= Duration.ofMillis(95), "elapsed=$elapsed below 95ms") - assertTrue(elapsed <= Duration.ofMillis(105), "elapsed=$elapsed above 105ms") + // Within the ±10% symmetric jitter window of 100ms. + assertTrue(elapsed >= Duration.ofMillis(90), "elapsed=$elapsed below 90ms") + assertTrue(elapsed <= Duration.ofMillis(110), "elapsed=$elapsed above 110ms") } @Test @@ -1384,8 +1385,9 @@ class RetryStepTest { val before = clock.now() pipeline.send(getRequest()) val elapsed = Duration.between(before, clock.now()) - assertTrue(elapsed >= Duration.ofMillis(75), "elapsed=$elapsed below 75ms") - assertTrue(elapsed <= Duration.ofMillis(85), "elapsed=$elapsed above 85ms") + // ±10% symmetric jitter on 80ms → [72, 88]. + assertTrue(elapsed >= Duration.ofMillis(72), "elapsed=$elapsed below 72ms") + assertTrue(elapsed <= Duration.ofMillis(88), "elapsed=$elapsed above 88ms") } // ----------------- Exception-side hook coverage ----------------- @@ -1585,13 +1587,13 @@ class RetryStepTest { assertEquals("attempt 2", ex.suppressed[1].message) } - // ----------------- MAX_SHIFT_TRY_COUNT cap ----------------- + // ----------------- Backoff overflow / boundary handling ----------------- @Test fun `baseDelay zero produces an immediate retry through the early-return branch`() { - // `if (baseNanos == 0L) return Duration.ZERO` in exponentialBackoff. The fixed-delay - // tests already drive baseDelay=Duration.ZERO, but this one explicitly exercises the - // exponential path with a zero base — the branch must short-circuit before any shift. + // A zero base delay must yield a zero exponential delay (immediate retry). The shared + // BackoffCalculator returns Duration.ZERO when initialDelay is zero, so the exponential + // path short-circuits to an immediate retry just as the fixed-delay path does. val clock = FixedClock() val opts = HttpRetryOptions( @@ -1616,12 +1618,13 @@ class RetryStepTest { @Test fun `exponential backoff overflow path is clamped to maxDelay rather than wrapping negative`() { - // Drive the overflow check: with baseDelay near Long.MAX_VALUE/2 and tryCount=30, - // baseNanos * (1L shl 30) overflows. The step must clamp to Long.MAX_VALUE and then - // to maxDelay rather than returning a negative duration. + // Drive the saturating-multiply path: with a base delay near the nanosecond ceiling and a + // high attempt count, the unscaled exponential value saturates instead of overflowing. + // The shared BackoffCalculator clamps the saturated value to maxDelay rather than + // returning a negative duration. val clock = FixedClock() - // 9_223_372_036 seconds is well over Long.MAX_VALUE / 2^30 in nanos, forcing the - // overflow branch. + // 9_223_372_036 seconds is just under the Long-nanosecond ceiling, so the scaled value + // saturates to maxDelay on every attempt. val opts = HttpRetryOptions( maxRetries = 35, @@ -1770,13 +1773,12 @@ class RetryStepTest { // ----------------- maxRetries clamping ----------------- @Test - fun `negative maxRetries is clamped to default 3`() { + fun `negative maxRetries is clamped to the default retry count`() { val fake = FakeHttpClient() .enqueue { status(503) } .enqueue { status(503) } .enqueue { status(503) } - .enqueue { status(503) } .enqueue { status(503) } // extra in case clamp went wrong val pipeline = @@ -1786,8 +1788,8 @@ class RetryStepTest { val response = pipeline.send(getRequest()) assertEquals(503, response.status.code) - // Default is 3 retries → 1 original + 3 retries = 4 calls. - assertEquals(4, fake.callCount) + // The clamp applies the canonical DEFAULT_MAX_RETRIES (2) → 1 original + 2 retries = 3 calls. + assertEquals(DefaultRetryStep.DEFAULT_MAX_RETRIES + 1, fake.callCount) } // ----------------- Diagnostic field coverage ----------------- diff --git a/sdk-core/src/test/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryDefaultsReconciliationTest.kt b/sdk-core/src/test/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryDefaultsReconciliationTest.kt new file mode 100644 index 00000000..20cab9af --- /dev/null +++ b/sdk-core/src/test/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryDefaultsReconciliationTest.kt @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2026 dexpace and Omar Aljarrah + * + * Licensed under the MIT License. See LICENSE in the project root. + * SPDX-License-Identifier: MIT + */ + +package org.dexpace.sdk.core.pipeline.step.retry + +import org.dexpace.sdk.core.client.HttpClient +import org.dexpace.sdk.core.http.common.Headers +import org.dexpace.sdk.core.http.pipeline.HttpPipelineBuilder +import org.dexpace.sdk.core.http.pipeline.steps.DefaultRetryStep +import org.dexpace.sdk.core.http.pipeline.steps.HttpRetryOptions +import org.dexpace.sdk.core.http.request.Request +import org.dexpace.sdk.core.http.response.Response +import org.dexpace.sdk.core.http.response.Status +import org.dexpace.sdk.core.io.Io +import org.dexpace.sdk.core.testing.FakeHttpClient +import org.dexpace.sdk.core.testing.FixedClock +import org.dexpace.sdk.io.OkioIoProvider +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.Test +import java.time.Duration + +/** + * Guards that the two retry stacks — the stage-based [DefaultRetryStep]/[HttpRetryOptions] and + * the recovery-aware [RetryStep]/[RetrySettings] — share ONE set of documented defaults and ONE + * backoff computation ([BackoffCalculator]). If a future change re-introduces divergent defaults + * (max attempts, base/initial delay, max delay, multiplier, jitter) or a second backoff formula, + * these assertions fail. + */ +class RetryDefaultsReconciliationTest { + @BeforeEach + fun setUp() { + Io.installProvider(OkioIoProvider) + } + + // region -- shared default constants -- + + @Test + fun `both stacks share the same base delay default`() { + assertEquals( + RetrySettings.DEFAULT_INITIAL_DELAY, + HttpRetryOptions().baseDelay, + "stage-based baseDelay default must equal the recovery-aware initialDelay default", + ) + assertEquals(Duration.ofMillis(200), HttpRetryOptions().baseDelay) + } + + @Test + fun `both stacks share the same max delay default`() { + assertEquals( + RetrySettings.DEFAULT_MAX_DELAY, + HttpRetryOptions().maxDelay, + "stage-based maxDelay default must equal the recovery-aware maxDelay default", + ) + assertEquals(Duration.ofSeconds(8), HttpRetryOptions().maxDelay) + } + + @Test + fun `both stacks default to the same total send budget`() { + // The two knobs count differently by design (retries vs total attempts), but the default + // budget must be the same number of sends: initial + DEFAULT_MAX_RETRIES == DEFAULT_MAX_ATTEMPTS. + assertEquals( + RetrySettings.DEFAULT_MAX_ATTEMPTS, + DefaultRetryStep.DEFAULT_MAX_RETRIES + 1, + "default total sends must agree across stacks (1 initial + retries == maxAttempts)", + ) + } + + // endregion + + // region -- shared backoff sequence -- + + /** + * The canonical no-jitter schedule for `initial=100ms, multiplier=2.0, maxDelay=1s`: + * attempt N (1-indexed) → `100ms * 2^(N-1)`, capped at 1s. Indexed by attempt-1. + */ + private val expectedSeriesMs = longArrayOf(100, 200, 400, 800, 1000, 1000) + + @Test + fun `BackoffCalculator produces the canonical exponential sequence`() { + val settings = + RetrySettings.builder() + .initialDelay(Duration.ofMillis(BASE_MS)) + .delayMultiplier(RetrySettings.DEFAULT_DELAY_MULTIPLIER) + .maxDelay(Duration.ofSeconds(1)) + .jitter(0.0) + .totalTimeout(Duration.ZERO) + .maxAttempts(10) + .build() + for (attempt in 1..expectedSeriesMs.size) { + assertEquals( + Duration.ofMillis(expectedSeriesMs[attempt - 1]), + BackoffCalculator.computeDelay(attempt, settings), + "attempt $attempt", + ) + } + } + + @Test + fun `stage-based step follows the same exponential sequence via BackoffCalculator`() { + // Drive the stage-based step and observe the cumulative FixedClock delta across a fixed + // number of retries. The stage-based step now computes its schedule through the shared + // BackoffCalculator with the canonical symmetric jitter (DEFAULT_JITTER = 0.2), so the + // cumulative elapsed must land within the ±10% jitter window of the canonical series sum. + for (retries in 1..4) { + val clock = FixedClock() + val fake = FakeHttpClient() + repeat(retries) { fake.enqueue { status(503) } } + fake.enqueue { status(200) } + + val opts = + HttpRetryOptions( + maxRetries = retries, + baseDelay = Duration.ofMillis(BASE_MS), + maxDelay = Duration.ofSeconds(1), + ) + val pipeline = + HttpPipelineBuilder(fake) + .append(DefaultRetryStep(opts, clock)) + .build() + + val before = clock.now() + pipeline.send(getRequest()) + val elapsedMs = Duration.between(before, clock.now()).toMillis() + + // Cumulative expected = sum of the first `retries` series entries. + var sum = 0L + for (i in 0 until retries) sum += expectedSeriesMs[i] + // Symmetric ±10% jitter (DEFAULT_JITTER = 0.2 → half-range 10%) per term; bound the sum. + val tolerance = (sum * (RetrySettings.DEFAULT_JITTER / 2.0)).toLong() + 1 + val low = sum - tolerance + val high = sum + tolerance + org.junit.jupiter.api.Assertions.assertTrue( + elapsedMs in low..high, + "retries=$retries: elapsed=$elapsedMs not in [$low,$high] (sum=$sum)", + ) + } + } + + // endregion + + // region -- consistent 408 policy -- + + @Test + fun `stage-based step retries a 408 by default`() { + val fake = + FakeHttpClient() + .enqueue { status(408) } + .enqueue { status(200) } + val pipeline = + HttpPipelineBuilder(fake) + .append(DefaultRetryStep(HttpRetryOptions(maxRetries = 2), zeroDelayClock())) + .build() + val response = pipeline.send(getRequest()) + assertEquals(200, response.status.code) + assertEquals(2, fake.callCount, "stage-based step must retry 408") + } + + @Test + fun `recovery-aware default retryable statuses contains 408`() { + org.junit.jupiter.api.Assertions.assertTrue( + RetrySettings.DEFAULT_RETRYABLE_STATUSES.contains(STATUS_REQUEST_TIMEOUT), + "recovery-aware default must agree with the stage-based 408 stance", + ) + } + + @Test + fun `recovery-aware step retries a 408 HttpException by default`() { + val client = + object : HttpClient { + private var calls = 0 + + override fun execute(request: Request): Response { + calls += 1 + if (calls == 1) { + throw object : org.dexpace.sdk.core.http.response.exception.HttpException( + status = Status.REQUEST_TIMEOUT, + headers = Headers.builder().build(), + body = null, + ) {} + } + return Response.builder() + .request(request) + .protocol(org.dexpace.sdk.core.http.common.Protocol.HTTP_1_1) + .status(Status.OK) + .headers(Headers.builder().build()) + .build() + } + } + val settings = + RetrySettings.builder() + .initialDelay(Duration.ZERO) + .maxDelay(Duration.ZERO) + .delayMultiplier(1.0) + .jitter(0.0) + .totalTimeout(Duration.ZERO) + .scheduler(java.util.concurrent.Executors.newSingleThreadScheduledExecutor()) + .build() + val step = RetryStep(client, settings, getRequest()) + val response = step.attempt() + assertEquals(200, response.status.code, "recovery-aware step must retry a 408") + } + + // endregion + + private fun getRequest(): Request = + Request.builder() + .method(org.dexpace.sdk.core.http.request.Method.GET) + .url("https://api.example.com/x") + .build() + + private fun zeroDelayClock(): org.dexpace.sdk.core.util.Clock = + object : org.dexpace.sdk.core.util.Clock { + override fun now(): java.time.Instant = java.time.Instant.EPOCH + + override fun monotonic(): Long = 0L + + override fun sleep(duration: Duration) = Unit + } + + private companion object { + private const val BASE_MS = 100L + private const val STATUS_REQUEST_TIMEOUT = 408 + } +} diff --git a/sdk-core/src/test/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetrySettingsTest.kt b/sdk-core/src/test/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetrySettingsTest.kt index a8ec06e8..574167b8 100644 --- a/sdk-core/src/test/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetrySettingsTest.kt +++ b/sdk-core/src/test/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetrySettingsTest.kt @@ -61,4 +61,30 @@ class RetrySettingsTest { .build() assertEquals(Duration.ofSeconds(8), settings.maxDelay) } + + // --- Canonical shared defaults (one source of truth for both retry stacks) -------------- + + @Test + fun `defaults match the canonical shared constants`() { + val settings = RetrySettings.defaults() + assertEquals(Duration.ofSeconds(30), settings.totalTimeout, "totalTimeout") + assertEquals(RetrySettings.DEFAULT_INITIAL_DELAY, settings.initialDelay, "initialDelay") + assertEquals(Duration.ofMillis(200), settings.initialDelay, "initialDelay value") + assertEquals(RetrySettings.DEFAULT_MAX_DELAY, settings.maxDelay, "maxDelay") + assertEquals(Duration.ofSeconds(8), settings.maxDelay, "maxDelay value") + assertEquals(RetrySettings.DEFAULT_DELAY_MULTIPLIER, settings.delayMultiplier, "delayMultiplier") + assertEquals(2.0, settings.delayMultiplier, "delayMultiplier value") + assertEquals(RetrySettings.DEFAULT_JITTER, settings.jitter, "jitter") + assertEquals(0.2, settings.jitter, "jitter value") + assertEquals(RetrySettings.DEFAULT_MAX_ATTEMPTS, settings.maxAttempts, "maxAttempts") + assertEquals(3, settings.maxAttempts, "maxAttempts value") + } + + @Test + fun `default retryable statuses include 408 and the common retryable codes`() { + // The reconciled stance: 408 IS retryable, matching RetryUtils / HttpException.retryable + // and the stage-based DefaultRetryStep. The two stacks must agree on the 408 question. + val statuses = RetrySettings.DEFAULT_RETRYABLE_STATUSES + assertEquals(setOf(408, 429, 500, 502, 503, 504), statuses.toSet()) + } } From b2f215d2bcecf24524b4f0fb2c30b7f63f52bc42 Mon Sep 17 00:00:00 2001 From: OmarAlJarrah Date: Tue, 16 Jun 2026 22:25:07 +0300 Subject: [PATCH 3/4] test: tidy retry-defaults reconciliation test and document eager delay validation Stop the recovery-aware 408 test from leaking a single-thread scheduled executor: with zero delays the step never schedules a deferred wait, so omit the caller-supplied scheduler and let RetryStep route through its process-wide lazy daemon scheduler (which is never leaked per run). RetryStep does not own or shut down a caller-supplied scheduler, so constructing one inline left a non-daemon thread per test run. Replace the inline fully-qualified references (assertions, Executors, HttpException, Protocol, Method, Clock, Instant) with imports to match the rest of the suite, and add a test pinning that constructing DefaultRetryStep now validates delay magnitudes eagerly. Document that eager validation on the backoffSettings field and smooth the delay-precedence KDoc phrasing. --- .../http/pipeline/steps/DefaultRetryStep.kt | 8 ++- .../retry/RetryDefaultsReconciliationTest.kt | 50 +++++++++++++++---- 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/DefaultRetryStep.kt b/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/DefaultRetryStep.kt index 8b9862aa..c2bcc9a9 100644 --- a/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/DefaultRetryStep.kt +++ b/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/DefaultRetryStep.kt @@ -68,7 +68,7 @@ import java.time.Duration * `baseDelay * 2.0^tryCount` clamped to `maxDelay`, with symmetric ±10% jitter * ([RetrySettings.DEFAULT_JITTER]). This is the same calculator the recovery-aware * `pipeline.step.retry.RetryStep` uses, so both stacks share one backoff formula and one - * set of defaults. The deadline-shrinking the calculator also offers is disabled here + * set of defaults. The deadline-shrinking that the calculator also offers is disabled here * (this stage-based step carries no total-timeout budget). * * ## Failure handling @@ -153,6 +153,12 @@ public open class DefaultRetryStep * options object does not expose its own multiplier/jitter, so the SDK defaults apply. * - `totalTimeout = ZERO` disables the deadline cap: the stage-based step has no budget. * The `fixedDelay` path never consults this view; it short-circuits in [backoffOrFixed]. + * + * Building this view also validates the delay magnitudes eagerly: [RetrySettings.builder] + * rejects a negative `baseDelay`/`maxDelay` and one larger than the calculator's + * ~292-year nanosecond ceiling. [HttpRetryOptions] performs no such range check, so a + * pathological delay surfaces as an [IllegalArgumentException] here, at step construction, + * rather than later at delay-computation time. */ private val backoffSettings: RetrySettings = RetrySettings.builder() diff --git a/sdk-core/src/test/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryDefaultsReconciliationTest.kt b/sdk-core/src/test/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryDefaultsReconciliationTest.kt index 20cab9af..8cc532b1 100644 --- a/sdk-core/src/test/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryDefaultsReconciliationTest.kt +++ b/sdk-core/src/test/kotlin/org/dexpace/sdk/core/pipeline/step/retry/RetryDefaultsReconciliationTest.kt @@ -9,20 +9,27 @@ package org.dexpace.sdk.core.pipeline.step.retry import org.dexpace.sdk.core.client.HttpClient import org.dexpace.sdk.core.http.common.Headers +import org.dexpace.sdk.core.http.common.Protocol import org.dexpace.sdk.core.http.pipeline.HttpPipelineBuilder import org.dexpace.sdk.core.http.pipeline.steps.DefaultRetryStep import org.dexpace.sdk.core.http.pipeline.steps.HttpRetryOptions +import org.dexpace.sdk.core.http.request.Method import org.dexpace.sdk.core.http.request.Request import org.dexpace.sdk.core.http.response.Response import org.dexpace.sdk.core.http.response.Status +import org.dexpace.sdk.core.http.response.exception.HttpException import org.dexpace.sdk.core.io.Io import org.dexpace.sdk.core.testing.FakeHttpClient import org.dexpace.sdk.core.testing.FixedClock +import org.dexpace.sdk.core.util.Clock import org.dexpace.sdk.io.OkioIoProvider import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertThrows +import org.junit.jupiter.api.Assertions.assertTrue import org.junit.jupiter.api.BeforeEach import org.junit.jupiter.api.Test import java.time.Duration +import java.time.Instant /** * Guards that the two retry stacks — the stage-based [DefaultRetryStep]/[HttpRetryOptions] and @@ -134,7 +141,7 @@ class RetryDefaultsReconciliationTest { val tolerance = (sum * (RetrySettings.DEFAULT_JITTER / 2.0)).toLong() + 1 val low = sum - tolerance val high = sum + tolerance - org.junit.jupiter.api.Assertions.assertTrue( + assertTrue( elapsedMs in low..high, "retries=$retries: elapsed=$elapsedMs not in [$low,$high] (sum=$sum)", ) @@ -162,7 +169,7 @@ class RetryDefaultsReconciliationTest { @Test fun `recovery-aware default retryable statuses contains 408`() { - org.junit.jupiter.api.Assertions.assertTrue( + assertTrue( RetrySettings.DEFAULT_RETRYABLE_STATUSES.contains(STATUS_REQUEST_TIMEOUT), "recovery-aware default must agree with the stage-based 408 stance", ) @@ -177,7 +184,7 @@ class RetryDefaultsReconciliationTest { override fun execute(request: Request): Response { calls += 1 if (calls == 1) { - throw object : org.dexpace.sdk.core.http.response.exception.HttpException( + throw object : HttpException( status = Status.REQUEST_TIMEOUT, headers = Headers.builder().build(), body = null, @@ -185,12 +192,15 @@ class RetryDefaultsReconciliationTest { } return Response.builder() .request(request) - .protocol(org.dexpace.sdk.core.http.common.Protocol.HTTP_1_1) + .protocol(Protocol.HTTP_1_1) .status(Status.OK) .headers(Headers.builder().build()) .build() } } + // No explicit scheduler: with zero delays the step never schedules a deferred wait, and + // leaving it null routes through RetryStep's process-wide lazy daemon scheduler rather + // than leaking a caller-owned executor (which RetryStep never shuts down) per run. val settings = RetrySettings.builder() .initialDelay(Duration.ZERO) @@ -198,7 +208,6 @@ class RetryDefaultsReconciliationTest { .delayMultiplier(1.0) .jitter(0.0) .totalTimeout(Duration.ZERO) - .scheduler(java.util.concurrent.Executors.newSingleThreadScheduledExecutor()) .build() val step = RetryStep(client, settings, getRequest()) val response = step.attempt() @@ -207,15 +216,38 @@ class RetryDefaultsReconciliationTest { // endregion + // region -- eager delay-magnitude validation -- + + @Test + fun `constructing the stage-based step validates delay magnitudes eagerly`() { + // HttpRetryOptions does not range-check baseDelay/maxDelay, but routing them through the + // shared RetrySettings (to drive BackoffCalculator) does. A negative delay must therefore + // fail fast at DefaultRetryStep construction, not later at delay-computation time. + assertThrows(IllegalArgumentException::class.java) { + DefaultRetryStep( + HttpRetryOptions(baseDelay = Duration.ofMillis(-1)), + zeroDelayClock(), + ) + } + assertThrows(IllegalArgumentException::class.java) { + DefaultRetryStep( + HttpRetryOptions(maxDelay = Duration.ofMillis(-1)), + zeroDelayClock(), + ) + } + } + + // endregion + private fun getRequest(): Request = Request.builder() - .method(org.dexpace.sdk.core.http.request.Method.GET) + .method(Method.GET) .url("https://api.example.com/x") .build() - private fun zeroDelayClock(): org.dexpace.sdk.core.util.Clock = - object : org.dexpace.sdk.core.util.Clock { - override fun now(): java.time.Instant = java.time.Instant.EPOCH + private fun zeroDelayClock(): Clock = + object : Clock { + override fun now(): Instant = Instant.EPOCH override fun monotonic(): Long = 0L From 5d89790318d1ca2e3ee897ffa5c236edbcb6217f Mon Sep 17 00:00:00 2001 From: OmarAlJarrah Date: Tue, 16 Jun 2026 23:25:24 +0300 Subject: [PATCH 4/4] docs(retry): note that the backoff view must track any future HttpRetryOptions multiplier/jitter knobs --- .../dexpace/sdk/core/http/pipeline/steps/DefaultRetryStep.kt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/DefaultRetryStep.kt b/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/DefaultRetryStep.kt index 69fa1c17..8df0f34a 100644 --- a/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/DefaultRetryStep.kt +++ b/sdk-core/src/main/kotlin/org/dexpace/sdk/core/http/pipeline/steps/DefaultRetryStep.kt @@ -156,6 +156,9 @@ public open class DefaultRetryStep * - `initialDelay` / `maxDelay` come from the options. * - `delayMultiplier` (2.0) and `jitter` (0.2) are the canonical shared constants — the * options object does not expose its own multiplier/jitter, so the SDK defaults apply. + * If [HttpRetryOptions] ever gains configurable multiplier/jitter knobs, this view must + * read them from the options instead of the constants, or the new knobs are silently + * ignored on this stack. * - `totalTimeout = ZERO` disables the deadline cap: the stage-based step has no budget. * The `fixedDelay` path never consults this view; it short-circuits in [backoffOrFixed]. *