From 97f80de1317ed7cdf3b098c574a57e3c365655f6 Mon Sep 17 00:00:00 2001 From: Jonathan Irwin Date: Thu, 4 Jun 2026 13:14:28 -0400 Subject: [PATCH] fix: auto-retry zip upload on transient network failures Retries up to 3 times with 2s exponential backoff on connection errors or 5xx responses. Seeks the file back to start and resets the progress counter before each attempt. 4xx responses are non-retryable. Co-Authored-By: Claude Sonnet 4.6 (1M context) --- internal/api/client.go | 81 ++++++++++++++++++++-------------- internal/ui/commands/deploy.go | 78 +++++++++++++++++++++----------- 2 files changed, 101 insertions(+), 58 deletions(-) diff --git a/internal/api/client.go b/internal/api/client.go index 1c9a1dd..987108b 100644 --- a/internal/api/client.go +++ b/internal/api/client.go @@ -478,43 +478,60 @@ func (c *client) UploadZip(ctx context.Context, uploadURL string, zipPath string slog.Info("Uploading zip file", "size", fileInfo.Size(), "path", zipPath) - // Create PUT request with context - req, err := http.NewRequestWithContext(ctx, "PUT", uploadURL, file) - if err != nil { - slog.Error("Failed to create upload request", "error", err) - return fmt.Errorf("failed to create upload request: %w", err) - } + const maxAttempts = 3 + const retryDelay = 2 * time.Second - req.Header.Set("Content-Type", "application/zip") - req.ContentLength = fileInfo.Size() + err = retry.Do( + func() error { + // Seek to start so each attempt reads the full file + if _, seekErr := file.Seek(0, io.SeekStart); seekErr != nil { + return retry.Unrecoverable(fmt.Errorf("failed to seek zip file: %w", seekErr)) + } - // Execute request - startTime := time.Now() - resp, err := c.httpClient.Do(req) - duration := time.Since(startTime) + // Create PUT request with context + req, err := http.NewRequestWithContext(ctx, "PUT", uploadURL, file) + if err != nil { + return retry.Unrecoverable(fmt.Errorf("failed to create upload request: %w", err)) + } + req.Header.Set("Content-Type", "application/zip") + req.ContentLength = fileInfo.Size() - if err != nil { - slog.Error("Zip upload request failed", "error", err, "duration", duration) - return fmt.Errorf("upload failed: %w", err) - } - defer resp.Body.Close() //nolint:errcheck // Deferred close, error not actionable + // Execute request + startTime := time.Now() + resp, err := c.httpClient.Do(req) + duration := time.Since(startTime) - if resp.StatusCode != 200 { - body, err := io.ReadAll(resp.Body) - if err != nil { - slog.Error("Failed to read error response", "error", err, "statusCode", resp.StatusCode) - return fmt.Errorf("failed to read error response body: %w", err) - } - slog.Error("Zip upload failed", - "statusCode", resp.StatusCode, - "response", string(body), - "duration", duration, - ) - return fmt.Errorf("upload failed with status %d: %s", resp.StatusCode, string(body)) - } + if err != nil { + slog.Error("Zip upload request failed", "error", err, "duration", duration) + return fmt.Errorf("upload failed: %w", err) + } + defer resp.Body.Close() //nolint:errcheck // Deferred close, error not actionable + + if resp.StatusCode != 200 { + body, _ := io.ReadAll(resp.Body) + slog.Error("Zip upload failed", "statusCode", resp.StatusCode, "response", string(body), "duration", duration) + // Don't retry on client errors (4xx) + if resp.StatusCode >= 400 && resp.StatusCode < 500 { + return retry.Unrecoverable(fmt.Errorf("upload failed with status %d: %s", resp.StatusCode, string(body))) + } + return fmt.Errorf("upload failed with status %d: %s", resp.StatusCode, string(body)) + } - slog.Info("Zip upload successful", "size", fileInfo.Size(), "duration", duration) - return nil + slog.Info("Zip upload successful", "size", fileInfo.Size(), "duration", duration) + return nil + }, + retry.Context(ctx), + retry.Attempts(maxAttempts), + retry.Delay(retryDelay), + retry.DelayType(retry.BackOffDelay), + retry.RetryIf(func(err error) bool { + return !errors.Is(err, context.Canceled) && !errors.Is(err, context.DeadlineExceeded) + }), + retry.OnRetry(func(n uint, err error) { + slog.Warn("Retrying zip upload", "attempt", n+1, "maxAttempts", maxAttempts, "error", err) + }), + ) + return err } // FetchBuildLogs retrieves build logs for a specific build diff --git a/internal/ui/commands/deploy.go b/internal/ui/commands/deploy.go index 0d41f58..8718ef2 100644 --- a/internal/ui/commands/deploy.go +++ b/internal/ui/commands/deploy.go @@ -2,6 +2,7 @@ package commands import ( "context" + "errors" "fmt" "io" "log/slog" @@ -13,6 +14,7 @@ import ( "sync/atomic" "time" + "github.com/avast/retry-go/v4" "github.com/bugsnag/bugsnag-go/v2" "github.com/charmbracelet/bubbles/progress" "github.com/charmbracelet/bubbles/viewport" @@ -1094,37 +1096,61 @@ func (m *DeployView) uploadZipWithProgress() error { return fmt.Errorf("failed to stat zip file: %w", err) } - // Create a reader that tracks progress - progressReader := &progressReader{ - reader: file, - counter: m.atomicBytesUploaded, - } + // Use a longer timeout for uploads (30 minutes for large files) + uploadClient := &http.Client{Timeout: 30 * time.Minute} - // Create PUT request with context - req, err := http.NewRequestWithContext(m.ctx, "PUT", m.appResponse.UploadURL, progressReader) - if err != nil { - return fmt.Errorf("failed to create upload request: %w", err) - } + const maxAttempts = 3 + const retryDelay = 2 * time.Second - req.Header.Set("Content-Type", "application/zip") - req.ContentLength = fileInfo.Size() + return retry.Do( + func() error { + // Reset file position and progress counter for each attempt + if _, seekErr := file.Seek(0, io.SeekStart); seekErr != nil { + return retry.Unrecoverable(fmt.Errorf("failed to seek zip file: %w", seekErr)) + } + m.atomicBytesUploaded.Store(0) - // Execute request using a simple HTTP client - client := &http.Client{ - Timeout: 30 * time.Minute, // Allow up to 30 minutes for large uploads - } - resp, err := client.Do(req) - if err != nil { - return fmt.Errorf("upload failed: %w", err) - } - defer resp.Body.Close() //nolint:errcheck // Deferred close, error not actionable + // Create a reader that tracks progress + progressReader := &progressReader{ + reader: file, + counter: m.atomicBytesUploaded, + } - if resp.StatusCode != 200 { - body, _ := io.ReadAll(resp.Body) - return fmt.Errorf("upload failed with status %d: %s", resp.StatusCode, string(body)) - } + // Create PUT request with context + req, err := http.NewRequestWithContext(m.ctx, "PUT", m.appResponse.UploadURL, progressReader) + if err != nil { + return retry.Unrecoverable(fmt.Errorf("failed to create upload request: %w", err)) + } + req.Header.Set("Content-Type", "application/zip") + req.ContentLength = fileInfo.Size() + + resp, err := uploadClient.Do(req) + if err != nil { + return fmt.Errorf("upload failed: %w", err) + } + defer resp.Body.Close() //nolint:errcheck // Deferred close, error not actionable - return nil + if resp.StatusCode != 200 { + body, _ := io.ReadAll(resp.Body) + if resp.StatusCode >= 400 && resp.StatusCode < 500 { + return retry.Unrecoverable(fmt.Errorf("upload failed with status %d: %s", resp.StatusCode, string(body))) + } + return fmt.Errorf("upload failed with status %d: %s", resp.StatusCode, string(body)) + } + + return nil + }, + retry.Context(m.ctx), + retry.Attempts(maxAttempts), + retry.Delay(retryDelay), + retry.DelayType(retry.BackOffDelay), + retry.RetryIf(func(err error) bool { + return !errors.Is(err, context.Canceled) && !errors.Is(err, context.DeadlineExceeded) + }), + retry.OnRetry(func(n uint, err error) { + slog.Warn("Retrying zip upload", "attempt", n+1, "maxAttempts", maxAttempts, "error", err) + }), + ) } // progressReader wraps an io.Reader and tracks bytes read