diff --git a/gems/smithy-client/lib/smithy-client/plugins/retry_errors.rb b/gems/smithy-client/lib/smithy-client/plugins/retry_errors.rb
index 720d85137..01730a060 100644
--- a/gems/smithy-client/lib/smithy-client/plugins/retry_errors.rb
+++ b/gems/smithy-client/lib/smithy-client/plugins/retry_errors.rb
@@ -6,66 +6,30 @@ module Plugins
       # @api private
       class RetryErrors < Plugin
         option(
-          :retry_strategy,
+          :retry_mode,
           default: 'standard',
-          doc_default: "'standard'",
-          doc_type: 'String, Class',
+          doc_type: String,
           docstring: <<~DOCS)
-            The retry strategy to use when retrying errors. This can be one of the following:
-            * `standard` - A standardized retry strategy used by the AWS SDKs. This includes support
-              for retry quotas, which limit the number of unsuccessful retries a client can make.
-            * `adaptive` - An experimental retry strategy that includes all the functionality of the
-              `standard` strategy along with automatic client side throttling. This is a provisional
-              strategy that may change behavior in the future.
-            * Any instance of a class that implements the following methods:
-              - `acquire_initial_retry_token(token_scope)`
-              - `refresh_retry_token(retry_token, error_info)`
-              - `record_success(retry_token)`
+            Specifies which retry algorithm to use. Values are:
+
+            * `standard` - A standardized set of retry rules across the Smithy-based SDKs.
+              This includes support for retry quotas, which limit the number of
+              unsuccessful retries a client can make. This is the default
+              value if no retry mode is provided.
+
+            * `adaptive` - A retry mode that includes all the functionality of
+              `standard` mode along with automatic client side throttling.
           DOCS
 
         option(
-          :retry_max_attempts,
+          :max_attempts,
           default: 3,
           doc_type: Integer,
           docstring: <<~DOCS)
             The maximum number attempts that will be made for a single request, including
-            the initial attempt. Used in the `standard` and `adaptive` retry strategies.
-          DOCS
-
-        option(
-          :retry_max_delay,
-          default: 20,
-          docstring: <<~DOCS)
-            The maximum delay, in seconds, between retry attempts. This option is ignored
-            if a custom `retry_backoff` is provided. Used in the `standard` and `adaptive`
-            retry strategies.
-          DOCS
-
-        option(
-          :retry_base_delay,
-          default: 2,
-          docstring: <<~DOCS)
-            The base delay, in seconds, used to calculate the exponential backoff for
-            retry attempts. This option is ignored if a custom `retry_backoff` is provided.
-            Used in the `standard` and `adaptive` retry strategies.
+            the initial attempt.
           DOCS
 
-        option(
-          :retry_backoff,
-          doc_default: 'Smithy::Client::Retry::ExponentialBackoff.new',
-          rbs_type: 'Smithy::Client::Retry::ExponentialBackoff',
-          doc_type: '#call(attempts)',
-          docstring: <<~DOCS) do |config|
-            A callable object that calculates a backoff delay for a retry attempt. The callable
-            should accept a single argument, `attempts`, that represents the number of attempts
-            that have been made. Used in the `standard` and `adaptive` retry strategies.
-          DOCS
-          Retry::ExponentialBackoff.new(
-            retry_base_delay: config.retry_base_delay,
-            retry_max_delay: config.retry_max_delay
-          )
-        end
-
         option(
           :adaptive_retry_wait_to_fill,
           default: true,
@@ -76,24 +40,54 @@ class RetryErrors < Plugin
             not retry instead of sleeping.
           DOCS
 
+        option(
+          :retry_strategy,
+          doc_type: 'Object',
+          docstring: <<~DOCS)
+            The retry strategy used by the client. If not provided, a default strategy is built
+            based on `:retry_mode` — either `Standard` or `Adaptive`.
+
+            A custom strategy must respond to:
+            * `#acquire_initial_retry_token` - returns a token
+            * `#refresh_retry_token(token, error_info)` - returns a token or nil
+            * `#record_success(token)` - records a successful request
+            * `#request_bookkeeping(error_info = nil)` - updates internal state
+          DOCS
+
+        REQUIRED_STRATEGY_METHODS = %i[
+          acquire_initial_retry_token
+          refresh_retry_token
+          record_success
+          request_bookkeeping
+        ].freeze
+
         def after_initialize(client)
           config = client.config
-          config.retry_strategy =
-            case config.retry_strategy
-            when 'standard'
-              Retry::Standard.new(
-                max_attempts: config.retry_max_attempts,
-                backoff: config.retry_backoff
-              )
-            when 'adaptive'
-              Retry::Adaptive.new(
-                max_attempts: config.retry_max_attempts,
-                backoff: config.retry_backoff,
-                wait_to_fill: config.adaptive_retry_wait_to_fill
-              )
-            else
-              config.retry_strategy
-            end
+          if config.retry_strategy
+            validate_strategy(config.retry_strategy)
+          else
+            config.retry_strategy = build_strategy(config)
+          end
+        end
+
+        private
+
+        def validate_strategy(strategy)
+          missing = REQUIRED_STRATEGY_METHODS.reject { |m| strategy.respond_to?(m) }
+          return if missing.empty?
+
+          raise ArgumentError, "Custom retry_strategy must respond to: #{missing.join(', ')}"
+        end
+
+        def build_strategy(config)
+          case config.retry_mode
+          when 'standard'
+            Retry::Standard.new(max_attempts: config.max_attempts)
+          when 'adaptive'
+            Retry::Adaptive.new(max_attempts: config.max_attempts, wait_to_fill: config.adaptive_retry_wait_to_fill)
+          else
+            raise ArgumentError, "Must provide either 'standard' or 'adaptive' for retry_mode"
+          end
         end
 
         # @api private
@@ -108,19 +102,35 @@ def call(context)
 
           def handle(context, retry_strategy, token)
             response = track_feature(retry_strategy) { @handler.call(context) }
-            if (error = response.error)
-              return response unless retryable?(context.http_request)
-
-              error_info = Http::ErrorInspector.new(error, context.http_response)
-              token = retry_strategy.refresh_retry_token(token, error_info)
-              return response unless token
+            error_info = Http::ErrorInspector.new(response.error, context.http_response) if response.error
+            retry_strategy.request_bookkeeping(error_info)
 
-              Kernel.sleep(token.retry_delay)
-            else
+            unless response.error
               retry_strategy.record_success(token)
               return response
             end
+            return response unless retryable?(context.http_request)
 
+            token = handle_error(context, retry_strategy, token, error_info)
+            return response unless token
+
+            retry_request(context, response, retry_strategy, token)
+          end
+
+          def handle_error(context, retry_strategy, token, error_info)
+            token = retry_strategy.refresh_retry_token(token, error_info)
+            return unless token
+
+            if token.no_retry_reason == :quota_exhausted
+              Kernel.sleep(token.retry_delay) if long_polling_operation?(context)
+              return
+            end
+
+            Kernel.sleep(token.retry_delay)
+            token
+          end
+
+          def retry_request(context, response, retry_strategy, token)
             reset_request(context)
             reset_response(context, response)
             context.retries += 1
@@ -141,6 +151,11 @@ def reset_response(context, response)
             response.error = nil
           end
 
+          # TODO: Revisit after trait is finalized.
+          def long_polling_operation?(context)
+            context.operation.traits.key?('smithy.api#longPoll')
+          end
+
           def track_feature(retry_strategy, &block)
             case retry_strategy
             when Retry::Standard then Features.track('RETRY_MODE_STANDARD', &block)
diff --git a/gems/smithy-client/lib/smithy-client/plugins/stub_responses.rb b/gems/smithy-client/lib/smithy-client/plugins/stub_responses.rb
index 092138613..9bd98b4eb 100644
--- a/gems/smithy-client/lib/smithy-client/plugins/stub_responses.rb
+++ b/gems/smithy-client/lib/smithy-client/plugins/stub_responses.rb
@@ -31,6 +31,20 @@ def add_handlers(handlers, config)
         def after_initialize(client)
           return unless client.config.stub_responses
 
+          remove_common_handlers(client)
+          remove_context_handlers(client)
+        end
+
+        private
+
+        def remove_common_handlers(client)
+          # Handlers removed when stubbing regardless of context.
+          # Subclasses should not override this method.
+        end
+
+        def remove_context_handlers(client)
+          # Context-specific handler removals. Override in subclasses
+          # to remove domain-specific handlers (e.g., domain-specific retry handler).
           client.handlers.remove(RetryErrors::Handler)
         end
 
diff --git a/gems/smithy-client/lib/smithy-client/retry/adaptive.rb b/gems/smithy-client/lib/smithy-client/retry/adaptive.rb
index 98ecfad3f..8f33480a9 100644
--- a/gems/smithy-client/lib/smithy-client/retry/adaptive.rb
+++ b/gems/smithy-client/lib/smithy-client/retry/adaptive.rb
@@ -5,8 +5,6 @@ module Client
     module Retry
       # Adaptive retry strategy for retrying requests.
       class Adaptive
-        # @option [#call] :backoff (ExponentialBackoff.new) A callable object that
-        #  calculates a backoff delay for a retry attempt.
         # @option [Integer] :max_attempts (3) The maximum number of attempts that
         #  will be made for a single request, including the initial attempt.
         # @option [Boolean] :wait_to_fill When true, the request will sleep until
@@ -15,60 +13,62 @@ class Adaptive
         #  not retry instead of sleeping.
         def initialize(options = {})
           super()
-          @backoff = options[:backoff] || ExponentialBackoff.new(
-            base_delay: options[:base_delay],
-            max_delay: options[:max_delay]
-          )
           @max_attempts = options[:max_attempts] || 3
-          @wait_to_fill = options[:wait_to_fill] || true
+          @wait_to_fill = options.fetch(:wait_to_fill, true)
           @client_rate_limiter = ClientRateLimiter.new
           @quota = Quota.new
-          @capacity_amount = 0
         end
 
-        # @return [#call]
-        attr_reader :backoff
-
         # @return [Integer]
         attr_reader :max_attempts
 
         # @return [Boolean]
         attr_reader :wait_to_fill
 
+        # Updates internal state based on the response outcome.
+        # @param [Http::ErrorInspector, nil] error_info The error info, or nil on success.
+        def request_bookkeeping(error_info = nil)
+          is_throttle = error_info&.error_type == 'Throttling'
+          @client_rate_limiter.update_sending_rate(is_throttle)
+        end
+
         def acquire_initial_retry_token(_token_scope = nil)
           @client_rate_limiter.token_bucket_acquire(1, wait_to_fill: @wait_to_fill)
           Token.new
         end
 
-        def refresh_retry_token(retry_token, error_info)
+        def refresh_retry_token(retry_token, error_info) # rubocop:disable Metrics/AbcSize
           return unless error_info.retryable?
 
-          @client_rate_limiter.update_sending_rate(
-            error_info.error_type == 'Throttling'
-          )
           return if retry_token.retry_count >= @max_attempts - 1
 
-          @capacity_amount = @quota.checkout_capacity(error_info)
-          return unless @capacity_amount.positive?
+          @client_rate_limiter.token_bucket_acquire(1, wait_to_fill: @wait_to_fill)
+
+          capacity_amount = @quota.checkout_capacity(error_info)
+          delay = backoff.call(retry_token.retry_count, error_info)
+          retry_token.capacity_amount = capacity_amount
+
+          if capacity_amount.zero?
+            retry_token.retry_delay = delay
+            retry_token.no_retry_reason = :quota_exhausted
+            return retry_token
+          end
 
-          delay = compute_delay(error_info, retry_token.retry_count)
           retry_token.retry_count += 1
           retry_token.retry_delay = delay
+          retry_token.no_retry_reason = nil
           retry_token
         end
 
         def record_success(retry_token)
-          @client_rate_limiter.update_sending_rate(false)
-          @quota.release(@capacity_amount)
+          @quota.release(retry_token.capacity_amount)
           retry_token
         end
 
         private
 
-        def compute_delay(error_info, retry_count)
-          return @backoff.call(retry_count) unless error_info.hints[:retry_after]
-
-          [error_info.hints[:retry_after], @backoff.max_delay].min
+        def backoff
+          @backoff ||= ExponentialBackoff.new
         end
       end
     end
diff --git a/gems/smithy-client/lib/smithy-client/retry/exponential_backoff.rb b/gems/smithy-client/lib/smithy-client/retry/exponential_backoff.rb
index 76624b3c5..047b809b2 100644
--- a/gems/smithy-client/lib/smithy-client/retry/exponential_backoff.rb
+++ b/gems/smithy-client/lib/smithy-client/retry/exponential_backoff.rb
@@ -3,25 +3,36 @@
 module Smithy
   module Client
     module Retry
-      # Default exponential backoff retry strategy for retrying requests.
+      # @api private
+      # Default exponential backoff for retrying requests.
       class ExponentialBackoff
-        def initialize(options = {})
-          @base_delay = options[:base_delay] || 2
-          @max_delay = options[:max_delay] || 20
-        end
-
-        # @return [Numeric]
-        attr_reader :base_delay
-
-        # @return [Numeric]
-        attr_reader :max_delay
+        MAX_BACKOFF = 20
+        EXPONENTIAL_BASE = 2
 
         # Calculates a delay based on exponential backoff strategy. Uses full jitter approach.
         # @param [Integer] attempts
+        # @param [Smithy::Client::Http::ErrorInspector] error_info
         # @return [Numeric] delay in seconds
-        def call(attempts)
-          delay = (@base_delay**attempts)
-          [delay, @max_delay].min * Kernel.rand
+        def call(attempts, error_info)
+          # From SEP: t_i = b * min(x * r^i, MAX_BACKOFF)
+          calculated_delay = backoff_scalar_x(error_info) * (EXPONENTIAL_BASE**attempts)
+          t_i = Kernel.rand * [calculated_delay, MAX_BACKOFF].min
+          apply_retry_after(t_i, error_info)
+        end
+
+        private
+
+        def apply_retry_after(t_i, error_info)
+          retry_after = error_info.hints[:retry_after]
+          return t_i unless retry_after
+
+          # Clamp retry delay to t_i < delay < t_i + 5 per SEP.
+          delay = [t_i, retry_after].max
+          [delay, t_i + 5].min
+        end
+
+        def backoff_scalar_x(error_info)
+          error_info.error_type == 'Throttling' ? 1 : 0.05
         end
       end
     end
diff --git a/gems/smithy-client/lib/smithy-client/retry/quota.rb b/gems/smithy-client/lib/smithy-client/retry/quota.rb
index 9466a4200..c5286e7f1 100644
--- a/gems/smithy-client/lib/smithy-client/retry/quota.rb
+++ b/gems/smithy-client/lib/smithy-client/retry/quota.rb
@@ -4,12 +4,12 @@ module Smithy
   module Client
     module Retry
       # @api private
-      # Used in 'standard' and 'adaptive' retry modes.
+      # Used in :standard and :adaptive retry modes.
       class Quota
         INITIAL_RETRY_TOKENS = 500
-        RETRY_COST = 5
+        RETRY_COST = 14
         NO_RETRY_INCREMENT = 1
-        TIMEOUT_RETRY_COST = 10
+        THROTTLING_RETRY_COST = 5
 
         def initialize
           @mutex = Mutex.new
@@ -23,8 +23,8 @@ def initialize
         def checkout_capacity(error_info)
           @mutex.synchronize do
             capacity_amount =
-              if error_info.error_type == 'Transient'
-                TIMEOUT_RETRY_COST
+              if error_info.error_type == 'Throttling'
+                THROTTLING_RETRY_COST
               else
                 RETRY_COST
               end
diff --git a/gems/smithy-client/lib/smithy-client/retry/standard.rb b/gems/smithy-client/lib/smithy-client/retry/standard.rb
index a3bc5b113..47391cd71 100644
--- a/gems/smithy-client/lib/smithy-client/retry/standard.rb
+++ b/gems/smithy-client/lib/smithy-client/retry/standard.rb
@@ -5,27 +5,22 @@ module Client
     module Retry
       # Standard retry strategy for retrying requests.
       class Standard
-        # @option [#call] :backoff (ExponentialBackoff.new) A callable object that
-        #  calculates a backoff delay for a retry attempt.
         # @option [Integer] :max_attempts (3) The maximum number of attempts that
         #  will be made for a single request, including the initial attempt.
         def initialize(options = {})
           super()
-          @backoff = options[:backoff] || ExponentialBackoff.new(
-            base_delay: options[:base_delay],
-            max_delay: options[:max_delay]
-          )
           @max_attempts = options[:max_attempts] || 3
           @quota = Quota.new
-          @capacity_amount = 0
         end
 
-        # @return [#call]
-        attr_reader :backoff
-
         # @return [Integer]
         attr_reader :max_attempts
 
+        # Updates internal state based on the response outcome.
+        # @param [Http::ErrorInspector, nil] error_info The error info, or nil on success.
+        # No-op for Standard retry strategy.
+        def request_bookkeeping(error_info = nil); end
+
         def acquire_initial_retry_token(_token_scope = nil)
           Token.new
         end
@@ -35,26 +30,31 @@ def refresh_retry_token(retry_token, error_info)
 
           return if retry_token.retry_count >= @max_attempts - 1
 
-          @capacity_amount = @quota.checkout_capacity(error_info)
-          return unless @capacity_amount.positive?
+          capacity_amount = @quota.checkout_capacity(error_info)
+          delay = backoff.call(retry_token.retry_count, error_info)
+          retry_token.capacity_amount = capacity_amount
+
+          if capacity_amount.zero?
+            retry_token.retry_delay = delay
+            retry_token.no_retry_reason = :quota_exhausted
+            return retry_token
+          end
 
-          delay = compute_delay(error_info, retry_token.retry_count)
           retry_token.retry_count += 1
           retry_token.retry_delay = delay
+          retry_token.no_retry_reason = nil
           retry_token
         end
 
         def record_success(retry_token)
-          @quota.release(@capacity_amount)
+          @quota.release(retry_token.capacity_amount)
           retry_token
         end
 
         private
 
-        def compute_delay(error_info, retry_count)
-          return @backoff.call(retry_count) unless error_info.hints[:retry_after]
-
-          [error_info.hints[:retry_after], @backoff.max_delay].min
+        def backoff
+          @backoff ||= ExponentialBackoff.new
         end
       end
     end
diff --git a/gems/smithy-client/lib/smithy-client/retry/token.rb b/gems/smithy-client/lib/smithy-client/retry/token.rb
index 93eb38469..93380b0c7 100644
--- a/gems/smithy-client/lib/smithy-client/retry/token.rb
+++ b/gems/smithy-client/lib/smithy-client/retry/token.rb
@@ -8,6 +8,8 @@ class Token
         def initialize
           @retry_count = 0
           @retry_delay = 0
+          @capacity_amount = nil
+          @no_retry_reason = nil
         end
 
         # The number of times the operation has been retried.
@@ -17,6 +19,14 @@ def initialize
         # The delay before the next retry.
         # @return [Numeric]
         attr_accessor :retry_delay
+
+        # The quota capacity token has taken.
+        # @return [Integer]
+        attr_accessor :capacity_amount
+
+        # The reason for no-retry.
+        # @return [Symbol, nil] :quota_exhausted, or nil
+        attr_accessor :no_retry_reason
       end
     end
   end
diff --git a/gems/smithy-client/spec/smithy-client/plugins/retry_errors_spec.rb b/gems/smithy-client/spec/smithy-client/plugins/retry_errors_spec.rb
index b61cdb6eb..3f22318e9 100644
--- a/gems/smithy-client/spec/smithy-client/plugins/retry_errors_spec.rb
+++ b/gems/smithy-client/spec/smithy-client/plugins/retry_errors_spec.rb
@@ -15,61 +15,47 @@ module Plugins
           expect(client.config).to respond_to(:retry_strategy)
         end
 
-        it 'adds a :retry_max_attempts option to config' do
-          expect(client.config).to respond_to(:retry_max_attempts)
+        it 'adds a :retry_mode option to config' do
+          expect(client.config).to respond_to(:retry_mode)
         end
 
-        it 'adds a :retry_backoff option to config' do
-          expect(client.config).to respond_to(:retry_backoff)
+        it 'adds a :max_attempts option to config' do
+          expect(client.config).to respond_to(:max_attempts)
         end
 
         it 'adds an :adaptive_retry_wait_to_fill option to config' do
           expect(client.config).to respond_to(:adaptive_retry_wait_to_fill)
         end
 
-        it 'creates a Standard retry strategy from a string' do
-          client = client_class.new(retry_strategy: 'standard')
+        it 'creates a Standard retry strategy from retry_mode' do
+          client = client_class.new(retry_mode: 'standard', stub_responses: true)
           expect(client.config.retry_strategy).to be_a(Retry::Standard)
         end
 
-        it 'creates an Adaptive retry strategy from a string' do
-          client = client_class.new(retry_strategy: 'adaptive')
+        it 'creates an Adaptive retry strategy from retry_mode' do
+          client = client_class.new(retry_mode: 'adaptive', stub_responses: true)
           expect(client.config.retry_strategy).to be_a(Retry::Adaptive)
         end
 
-        it 'uses a custom retry strategy' do
-          retry_strategy = double('CustomRetryStrategy')
-          client = client_class.new(retry_strategy: retry_strategy)
-          expect(client.config.retry_strategy).to be(retry_strategy)
-        end
-
-        it 'passes flat options to the standard retry strategy class' do
+        it 'passes max_attempts to the standard retry strategy' do
           client = client_class.new(
-            retry_strategy: 'standard',
-            retry_max_attempts: 5,
-            retry_backoff: 2
+            retry_mode: 'standard',
+            max_attempts: 5,
+            stub_responses: true
           )
-          expect(client.config.retry_max_attempts).to eq(5)
-          expect(client.config.retry_backoff).to eq(2)
-          retry_strategy = client.config.retry_strategy
-          expect(retry_strategy.max_attempts).to eq(5)
-          expect(retry_strategy.backoff).to eq(2)
+          expect(client.config.max_attempts).to eq(5)
+          expect(client.config.retry_strategy.max_attempts).to eq(5)
         end
 
-        it 'passes flat options to the adaptive retry strategy class' do
+        it 'passes options to the adaptive retry strategy' do
           client = client_class.new(
-            retry_strategy: 'adaptive',
-            retry_max_attempts: 5,
-            retry_backoff: 2,
-            adaptive_retry_wait_to_fill: 5
+            retry_mode: 'adaptive',
+            max_attempts: 5,
+            adaptive_retry_wait_to_fill: false,
+            stub_responses: true
           )
-          expect(client.config.retry_max_attempts).to eq(5)
-          expect(client.config.retry_backoff).to eq(2)
-          expect(client.config.adaptive_retry_wait_to_fill).to eq(5)
-          retry_strategy = client.config.retry_strategy
-          expect(retry_strategy.max_attempts).to eq(5)
-          expect(retry_strategy.backoff).to eq(2)
-          expect(retry_strategy.wait_to_fill).to eq(5)
+          expect(client.config.max_attempts).to eq(5)
+          expect(client.config.retry_strategy.max_attempts).to eq(5)
         end
 
         it 'adds the handler by default' do
@@ -89,7 +75,8 @@ module Plugins
             config.build!
           end
 
-          let(:context) { HandlerContext.new(config: config) }
+          let(:operation) { double('operation', traits: {}) }
+          let(:context) { HandlerContext.new(config: config, operation: operation) }
           let(:response) { Response.new(context: context) }
           let(:service_error) { ServiceError.new(context, Schema::EmptyStructure.new) }
 
@@ -109,15 +96,15 @@ module Plugins
               test_case_def = [
                 {
                   response: { status_code: 500, error: service_error },
-                  expect: { available_capacity: 495, retries: 1, delay: 1 }
+                  expect: { available_capacity: 486, retries: 1, delay: 0.05 }
                 },
                 {
                   response: { status_code: 500, error: service_error },
-                  expect: { available_capacity: 490, retries: 2, delay: 2 }
+                  expect: { available_capacity: 472, retries: 2, delay: 0.1 }
                 },
                 {
                   response: { status_code: 200, error: nil },
-                  expect: { available_capacity: 495, retries: 2 }
+                  expect: { available_capacity: 486, retries: 2 }
                 } # success
               ]
 
@@ -128,15 +115,15 @@ module Plugins
               test_case_def = [
                 {
                   response: { status_code: 500, error: service_error },
-                  expect: { available_capacity: 495, retries: 1, delay: 1 }
+                  expect: { available_capacity: 486, retries: 1, delay: 0.05 }
                 },
                 {
                   response: { status_code: 500, error: service_error },
-                  expect: { available_capacity: 490, retries: 2, delay: 2 }
+                  expect: { available_capacity: 472, retries: 2, delay: 0.1 }
                 },
                 {
                   response: { status_code: 500, error: service_error },
-                  expect: { available_capacity: 490, retries: 2 }
+                  expect: { available_capacity: 472, retries: 2 }
                 } # failure
               ]
 
@@ -144,12 +131,12 @@ module Plugins
             end
 
             it 'fails due to retry quota reached after a single retry' do
-              quota.instance_variable_set(:@available_capacity, 5)
+              quota.instance_variable_set(:@available_capacity, 14)
 
               test_case_def = [
                 {
                   response: { status_code: 500, error: service_error },
-                  expect: { available_capacity: 0, retries: 1, delay: 1 }
+                  expect: { available_capacity: 0, retries: 1, delay: 0.05 }
                 },
                 {
                   response: { status_code: 500, error: service_error },
@@ -179,23 +166,23 @@ module Plugins
               test_case_def = [
                 {
                   response: { status_code: 500, error: service_error },
-                  expect: { available_capacity: 495, retries: 1, delay: 1 }
+                  expect: { available_capacity: 486, retries: 1, delay: 0.05 }
                 },
                 {
                   response: { status_code: 500, error: service_error },
-                  expect: { available_capacity: 490, retries: 2, delay: 2 }
+                  expect: { available_capacity: 472, retries: 2, delay: 0.1 }
                 },
                 {
                   response: { status_code: 500, error: service_error },
-                  expect: { available_capacity: 485, retries: 3, delay: 4 }
+                  expect: { available_capacity: 458, retries: 3, delay: 0.2 }
                 },
                 {
                   response: { status_code: 500, error: service_error },
-                  expect: { available_capacity: 480, retries: 4, delay: 8 }
+                  expect: { available_capacity: 444, retries: 4, delay: 0.4 }
                 },
                 {
                   response: { status_code: 500, error: service_error },
-                  expect: { available_capacity: 480, retries: 4 }
+                  expect: { available_capacity: 444, retries: 4 }
                 }
               ]
 
@@ -203,45 +190,60 @@ module Plugins
             end
 
             it 'does not exceed the max backoff time' do
-              config.retry_strategy = Retry::Standard.new(max_delay: 3)
               retry_strategy.instance_variable_set(:@max_attempts, 5)
+              stub_const('Smithy::Client::Retry::ExponentialBackoff::MAX_BACKOFF', 0.2)
 
               test_case_def = [
                 {
                   response: { status_code: 500, error: service_error },
-                  expect: { available_capacity: 495, retries: 1, delay: 1 }
+                  expect: { available_capacity: 486, retries: 1, delay: 0.05 }
                 },
                 {
                   response: { status_code: 500, error: service_error },
-                  expect: { available_capacity: 490, retries: 2, delay: 2 }
+                  expect: { available_capacity: 472, retries: 2, delay: 0.1 }
                 },
                 {
                   response: { status_code: 500, error: service_error },
-                  expect: { available_capacity: 485, retries: 3, delay: 3 }
+                  expect: { available_capacity: 458, retries: 3, delay: 0.2 }
                 },
                 {
                   response: { status_code: 500, error: service_error },
-                  expect: { available_capacity: 480, retries: 4, delay: 3 }
+                  expect: { available_capacity: 444, retries: 4, delay: 0.2 }
                 },
                 {
                   response: { status_code: 500, error: service_error },
-                  expect: { available_capacity: 480, retries: 4 }
+                  expect: { available_capacity: 444, retries: 4 }
                 }
               ]
 
               handle_with_retry(test_case_def)
             end
 
-            it 'clamps retry_after hint to max_delay' do
-              config.retry_strategy = Retry::Standard.new(max_delay: 3)
+            it 'clamps retry_after hint' do
               allow(Kernel).to receive(:rand).and_return(1)
 
               response.context.http_response.headers['retry-after'] = '9999'
 
               test_case_def = [
                 {
+                  # retry_after=9999s, t_i=0.05, clamped to t_i+5=5.05
                   response: { status_code: 503, error: service_error },
-                  expect: { available_capacity: 495, retries: 1, delay: 3 }
+                  expect: { available_capacity: 486, retries: 1, delay: 5.05 }
+                },
+                {
+                  response: { status_code: 200, error: nil },
+                  expect: { available_capacity: 500, retries: 1 }
+                }
+              ]
+
+              handle_with_retry(test_case_def)
+            end
+
+            it 'throttling error costs 5 tokens and uses 1s base backoff' do
+              test_case_def = [
+                {
+                  response: { status_code: 429, error: service_error },
+                  expect: { available_capacity: 495, retries: 1, delay: 1.0 }
                 },
                 {
                   response: { status_code: 200, error: nil },
@@ -253,21 +255,17 @@ module Plugins
             end
 
             it 'fails due to retry quota bucket exhaustion' do
-              config.retry_max_attempts = 5
-              quota.instance_variable_set(:@available_capacity, 10)
+              config.max_attempts = 5
+              quota.instance_variable_set(:@available_capacity, 20)
 
               test_case_def = [
                 {
                   response: { status_code: 500, error: service_error },
-                  expect: { available_capacity: 5, retries: 1, delay: 1 }
+                  expect: { available_capacity: 6, retries: 1, delay: 0.05 }
                 },
                 {
                   response: { status_code: 502, error: service_error },
-                  expect: { available_capacity: 0, retries: 2, delay: 2 }
-                },
-                {
-                  response: { status_code: 503, error: service_error },
-                  expect: { available_capacity: 0, retries: 2 }
+                  expect: { available_capacity: 6, retries: 1 }
                 }
               ]
 
@@ -275,21 +273,21 @@ module Plugins
             end
 
             it 'recovers after successful responses' do
-              config.retry_max_attempts = 5
-              quota.instance_variable_set(:@available_capacity, 15)
+              config.max_attempts = 5
+              quota.instance_variable_set(:@available_capacity, 30)
 
               test_case_def = [
                 {
                   response: { status_code: 500, error: service_error },
-                  expect: { available_capacity: 10, retries: 1, delay: 1 }
+                  expect: { available_capacity: 16, retries: 1, delay: 0.05 }
                 },
                 {
                   response: { status_code: 502, error: service_error },
-                  expect: { available_capacity: 5, retries: 2, delay: 2 }
+                  expect: { available_capacity: 2, retries: 2, delay: 0.1 }
                 },
                 {
                   response: { status_code: 200, error: nil },
-                  expect: { available_capacity: 10, retries: 2 }
+                  expect: { available_capacity: 16, retries: 2 }
                 }
               ]
               handle_with_retry(test_case_def)
@@ -297,16 +295,123 @@ module Plugins
               test_case_post_success = [
                 {
                   response: { status_code: 500, error: service_error },
-                  expect: { available_capacity: 5, retries: 1, delay: 1 }
+                  expect: { available_capacity: 2, retries: 1, delay: 0.05 }
                 },
                 {
                   response: { status_code: 200, error: nil },
-                  expect: { available_capacity: 10, retries: 1 }
+                  expect: { available_capacity: 16, retries: 1 }
                 }
               ]
               reset_request
               handle_with_retry(test_case_post_success)
             end
+
+            it 'shares retry quota across requests' do
+              test_case_def = [
+                {
+                  response: { status_code: 500, error: service_error },
+                  expect: { available_capacity: 486, retries: 1, delay: 0.05 }
+                },
+                {
+                  response: { status_code: 500, error: service_error },
+                  expect: { available_capacity: 472, retries: 2, delay: 0.1 }
+                },
+                {
+                  response: { status_code: 200, error: nil },
+                  expect: { available_capacity: 486, retries: 2 }
+                }
+              ]
+              handle_with_retry(test_case_def)
+
+              # Second request shares the same quota (486 remaining)
+              test_case_def2 = [
+                {
+                  response: { status_code: 500, error: service_error },
+                  expect: { available_capacity: 472, retries: 1, delay: 0.05 }
+                },
+                {
+                  response: { status_code: 200, error: nil },
+                  expect: { available_capacity: 486, retries: 1 }
+                }
+              ]
+              reset_request
+              handle_with_retry(test_case_def2)
+            end
+          end
+
+          context 'long-polling operations' do
+            before(:each) do
+              config.retry_strategy = Retry::Standard.new
+              allow(Kernel).to receive(:rand).and_return(1)
+              allow(operation).to receive(:traits)
+                .and_return({ 'smithy.api#longPoll' => {} })
+            end
+
+            it 'backs off after transient error when token bucket empty' do
+              quota.instance_variable_set(:@available_capacity, 0)
+
+              test_case_def = [
+                {
+                  response: { status_code: 500, error: service_error },
+                  expect: { available_capacity: 0, retries: 0, delay: 0.05 }
+                }
+              ]
+              handle_with_retry(test_case_def)
+            end
+
+            it 'backs off after throttling error when token bucket empty' do
+              quota.instance_variable_set(:@available_capacity, 0)
+
+              test_case_def = [
+                {
+                  response: { status_code: 429, error: service_error },
+                  expect: { available_capacity: 0, retries: 0, delay: 1.0 }
+                }
+              ]
+              handle_with_retry(test_case_def)
+            end
+
+            it 'does not delay when max attempts exceeded' do
+              test_case_def = [
+                {
+                  response: { status_code: 500, error: service_error },
+                  expect: { retries: 1, delay: 0.05 }
+                },
+                {
+                  response: { status_code: 500, error: service_error },
+                  expect: { retries: 2, delay: 0.1 }
+                },
+                {
+                  response: { status_code: 500, error: service_error },
+                  expect: { retries: 2 }
+                }
+              ]
+              handle_with_retry(test_case_def)
+            end
+
+            it 'does not delay on success' do
+              test_case_def = [
+                {
+                  response: { status_code: 500, error: service_error },
+                  expect: { retries: 1, delay: 0.05 }
+                },
+                {
+                  response: { status_code: 200, error: nil },
+                  expect: { retries: 1 }
+                }
+              ]
+              handle_with_retry(test_case_def)
+            end
+
+            it 'does not delay on non-retryable errors' do
+              test_case_def = [
+                {
+                  response: { status_code: 404, error: service_error },
+                  expect: { retries: 0 }
+                }
+              ]
+              handle_with_retry(test_case_def)
+            end
           end
 
           context 'adaptive mode' do
@@ -318,16 +423,17 @@ module Plugins
               client_rate_limiter.instance_variable_set(:@last_max_rate, 10)
             end
 
-            it 'clamps retry_after hint to max_delay' do
-              config.retry_strategy = Retry::Adaptive.new(max_delay: 3)
+            it 'clamps retry_after hint' do
+              config.retry_strategy = Retry::Adaptive.new
               allow(Kernel).to receive(:rand).and_return(1)
 
               response.context.http_response.headers['retry-after'] = '9999'
 
               test_case_def = [
                 {
+                  # retry_after=9999s, t_i=0.05, clamped to t_i+5=5.05
                   response: { status_code: 503, error: service_error },
-                  expect: { retries: 1, delay: 3 }
+                  expect: { retries: 1, delay: 5.05 }
                 },
                 {
                   response: { status_code: 200, error: nil },
diff --git a/gems/smithy-client/spec/smithy-client/retry/quota_spec.rb b/gems/smithy-client/spec/smithy-client/retry/quota_spec.rb
index 09aaadee8..08724ae19 100644
--- a/gems/smithy-client/spec/smithy-client/retry/quota_spec.rb
+++ b/gems/smithy-client/spec/smithy-client/retry/quota_spec.rb
@@ -1,5 +1,7 @@
 # frozen_string_literal: true
 
+require_relative '../../spec_helper'
+
 module Smithy
   module Client
     module Retry
@@ -18,11 +20,11 @@ module Retry
           end
 
           it 'checks out the timeout cost when the error is a networking error' do
-            error = double('ErrorInspector', error_type: 'Transient')
+            error = double('ErrorInspector', error_type: 'Throttling')
 
             checked_out_capacity = subject.checkout_capacity(error)
             expect(checked_out_capacity)
-              .to eq(Retry::Quota::TIMEOUT_RETRY_COST)
+              .to eq(Retry::Quota::THROTTLING_RETRY_COST)
           end
 
           it 'returns 0 when there is insufficient capacity' do