From aaf5b2e04565508b1c9307a4f0c12048c723064b Mon Sep 17 00:00:00 2001 From: Huohua Dev Date: Mon, 9 Feb 2026 13:41:31 +0800 Subject: [PATCH 1/9] feat: real-time flush output to prevent data loss on SIGKILL - Call os.Stdout.Sync() after each URL write in WriteURLs and WriteURLsJSON - Ensure data is immediately flushed to disk in pipe/redirect scenarios - Add atomic URL counter parameter for exit summary tracking --- pkg/output/output.go | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/pkg/output/output.go b/pkg/output/output.go index 6b69475..8d15916 100644 --- a/pkg/output/output.go +++ b/pkg/output/output.go @@ -3,8 +3,10 @@ package output import ( "io" "net/url" + "os" "path" "strings" + "sync/atomic" mapset "github.com/deckarep/golang-set/v2" jsoniter "github.com/json-iterator/go" @@ -15,7 +17,7 @@ type JSONResult struct { Url string `json:"url"` } -func WriteURLs(writer io.Writer, results <-chan string, blacklistMap mapset.Set[string], RemoveParameters bool) error { +func WriteURLs(writer io.Writer, results <-chan string, blacklistMap mapset.Set[string], RemoveParameters bool, urlCount *int64) error { lastURL := mapset.NewThreadUnsafeSet[string]() for result := range results { buf := bytebufferpool.Get() @@ -38,12 +40,17 @@ func WriteURLs(writer io.Writer, results <-chan string, blacklistMap mapset.Set[ if err != nil { return err } + atomic.AddInt64(urlCount, 1) + // Real-time flush: sync stdout after each write to prevent data loss + if writer == os.Stdout { + os.Stdout.Sync() + } bytebufferpool.Put(buf) } return nil } -func WriteURLsJSON(writer io.Writer, results <-chan string, blacklistMap mapset.Set[string], RemoveParameters bool) { +func WriteURLsJSON(writer io.Writer, results <-chan string, blacklistMap mapset.Set[string], RemoveParameters bool, urlCount *int64) { var jr JSONResult enc := jsoniter.NewEncoder(writer) for result := range results { @@ -59,5 +66,10 @@ func WriteURLsJSON(writer io.Writer, results <-chan string, blacklistMap mapset. // todo: handle this error continue } + atomic.AddInt64(urlCount, 1) + // Real-time flush: sync stdout after each write to prevent data loss + if writer == os.Stdout { + os.Stdout.Sync() + } } } From 372e4953627ef5a4a8507c2c07c43783e7cfcc39 Mon Sep 17 00:00:00 2001 From: Huohua Dev Date: Mon, 9 Feb 2026 13:41:37 +0800 Subject: [PATCH 2/9] feat: add retry with exponential backoff and StatusCodeError - Add StatusCodeError type to carry HTTP status codes through error chain - Implement exponential backoff retry for network errors (capped at 30s) - Skip retry for 429 rate-limit and 400 bad-request responses - Add shouldRetry() to detect retryable network errors - Replace manual case-insensitive search with strings.ToLower --- pkg/httpclient/client.go | 90 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 3 deletions(-) diff --git a/pkg/httpclient/client.go b/pkg/httpclient/client.go index cbae139..33e02df 100644 --- a/pkg/httpclient/client.go +++ b/pkg/httpclient/client.go @@ -2,7 +2,10 @@ package httpclient import ( "errors" + "fmt" + "math" "math/rand" + "strings" "time" "github.com/valyala/fasthttp" @@ -12,8 +15,23 @@ var ( ErrNilResponse = errors.New("unexpected nil response") ErrNon200Response = errors.New("API responded with non-200 status code") ErrBadRequest = errors.New("API responded with 400 status code") + ErrRateLimited = errors.New("API rate limited") ) +// StatusCodeError is an error type that carries an HTTP status code +type StatusCodeError struct { + Code int + Msg string +} + +func (e *StatusCodeError) Error() string { + return fmt.Sprintf("%s (status code: %d)", e.Msg, e.Code) +} + +func (e *StatusCodeError) Unwrap() error { + return errors.New(e.Msg) +} + type Header struct { Key string Value string @@ -39,6 +57,28 @@ func MakeRequest(c *fasthttp.Client, url string, maxRetries uint, timeout uint, req.Header.Set("Accept", "*/*") req.SetRequestURI(url) respBody, err = doReq(c, req, timeout) + + // Check if we should retry based on error type + if err != nil { + // Exponential backoff: 1s, 2s, 4s, 8s, 16s... with cap at 30s + backoffDuration := time.Duration(math.Pow(2, float64(retries-i))) * time.Second + if backoffDuration > 30*time.Second { + backoffDuration = 30 * time.Second + } + if i > 0 && shouldRetry(err) { + time.Sleep(backoffDuration) + continue + } + } + + // Check for rate limit (429) or bad request (400) from error + if err != nil { + statusCode := getStatusCodeFromError(err) + if statusCode == 429 || statusCode == 400 { + return nil, ErrRateLimited + } + } + if err == nil { break } @@ -49,6 +89,48 @@ func MakeRequest(c *fasthttp.Client, url string, maxRetries uint, timeout uint, return respBody, nil } +// shouldRetry determines if an error should trigger a retry +func shouldRetry(err error) bool { + if err == nil { + return false + } + // Network errors that should trigger retry + errMsg := err.Error() + retryableErrors := []string{ + "connection refused", + "connection reset", + "connection timed out", + "no such host", + "timeout", + "server closed connection", + "network is unreachable", + "i/o timeout", + } + for _, pattern := range retryableErrors { + if containsIgnoreCase(errMsg, pattern) { + return true + } + } + return false +} + +// containsIgnoreCase checks if s contains substr (case-insensitive) +func containsIgnoreCase(s, substr string) bool { + return strings.Contains(strings.ToLower(s), strings.ToLower(substr)) +} + +// getStatusCodeFromError attempts to extract status code from error +func getStatusCodeFromError(err error) int { + if err == nil { + return 0 + } + var statusErr *StatusCodeError + if errors.As(err, &statusErr) { + return statusErr.Code + } + return 0 +} + // doReq handles http requests func doReq(c *fasthttp.Client, req *fasthttp.Request, timeout uint) ([]byte, error) { resp := fasthttp.AcquireResponse() @@ -58,10 +140,12 @@ func doReq(c *fasthttp.Client, req *fasthttp.Request, timeout uint) ([]byte, err return nil, err } if resp.StatusCode() != 200 { - if resp.StatusCode() == 400 { - return nil, ErrBadRequest + errMsg := fmt.Sprintf("API responded with status code %d", resp.StatusCode()) + // Return wrapped error with status code for proper handling + return nil, &StatusCodeError{ + Code: resp.StatusCode(), + Msg: errMsg, } - return nil, ErrNon200Response } if resp.Body() == nil { return nil, ErrNilResponse From 10d4568dda648f42a001ab3577ae270d9bf2fe51 Mon Sep 17 00:00:00 2001 From: Huohua Dev Date: Mon, 9 Feb 2026 13:41:42 +0800 Subject: [PATCH 3/9] feat: concurrent pagination for wayback provider - Implement dispatcher+worker pattern for parallel page fetching - Use sync.Once to safely stop dispatcher on empty results - Add structured logging with provider/domain/page fields - Use StatusCodeError for proper 400 status handling - Support configurable provider-threads parameter --- pkg/providers/wayback/wayback.go | 128 ++++++++++++++++++++++++------- 1 file changed, 100 insertions(+), 28 deletions(-) diff --git a/pkg/providers/wayback/wayback.go b/pkg/providers/wayback/wayback.go index 156adcd..6c0be9e 100644 --- a/pkg/providers/wayback/wayback.go +++ b/pkg/providers/wayback/wayback.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "sync" jsoniter "github.com/json-iterator/go" "github.com/lc/gau/v2/pkg/httpclient" @@ -38,39 +39,110 @@ type waybackResult [][]string // Fetch fetches all urls for a given domain and sends them to a channel. // It returns an error should one occur. func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error { - for page := uint(0); ; page++ { - select { - case <-ctx.Done(): - return nil - default: - logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain) - apiURL := c.formatURL(domain, page) - // make HTTP request - resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout) - if err != nil { - if errors.Is(err, httpclient.ErrBadRequest) { - return nil - } - return fmt.Errorf("failed to fetch wayback results page %d: %s", page, err) - } - var result waybackResult - if err = jsoniter.Unmarshal(resp, &result); err != nil { - return fmt.Errorf("failed to decode wayback results for page %d: %s", page, err) - } + // Use provider threads for concurrent pagination, default to 3 if not set + numThreads := c.config.ProviderThreads + if numThreads == 0 { + numThreads = 3 + } - // check if there's results, wayback's pagination response - // is not always correct when using a filter - if len(result) == 0 { - break - } + // channel to collect errors from goroutines + var fetchErr error + var errMu sync.Mutex + var stopOnce sync.Once + stopCh := make(chan struct{}) - // output results - // Slicing as [1:] to skip first result by default - for _, entry := range result[1:] { - results <- entry[0] + // pageChan is a buffered channel for page dispatching + pageChan := make(chan uint, numThreads) + + // Page dispatcher: sequentially increments pages, stops when receiving stop signal + go func() { + defer close(pageChan) + for page := uint(0); ; page++ { + select { + case <-ctx.Done(): + return + case <-stopCh: + return + case pageChan <- page: } } + }() + + // Workers: fetch pages from pageChan, notify dispatcher to stop on empty results + var wg sync.WaitGroup + for i := uint(0); i < numThreads; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for page := range pageChan { + select { + case <-ctx.Done(): + return + default: + } + logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain) + apiURL := c.formatURL(domain, page) + // make HTTP request + resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout) + if err != nil { + var statusErr *httpclient.StatusCodeError + if errors.As(err, &statusErr) && statusErr.Code == 400 { + logrus.WithFields(logrus.Fields{ + "provider": Name, + "domain": domain, + "page": page, + "status": 400, + }).Info("Wayback: no more pages") + stopOnce.Do(func() { close(stopCh) }) + return + } + logrus.WithFields(logrus.Fields{ + "provider": Name, + "domain": domain, + "page": page, + "error": err.Error(), + }).Warn("failed to fetch wayback") + errMu.Lock() + if fetchErr == nil { + fetchErr = fmt.Errorf("failed to fetch wayback results page %d: %s", page, err) + } + errMu.Unlock() + continue + } + var result waybackResult + if err = jsoniter.Unmarshal(resp, &result); err != nil { + errMu.Lock() + if fetchErr == nil { + fetchErr = fmt.Errorf("failed to decode wayback results for page %d: %s", page, err) + } + errMu.Unlock() + continue + } + + // check if there's results, wayback's pagination response + // is not always correct when using a filter + if len(result) == 0 { + // Notify dispatcher to stop when empty result is encountered + stopOnce.Do(func() { close(stopCh) }) + return + } + + // output results + // Slicing as [1:] to skip first result by default + for _, entry := range result[1:] { + select { + case <-ctx.Done(): + return + case results <- entry[0]: + } + } + } + }() } + + wg.Wait() + + return fetchErr } // formatUrl returns a formatted URL for the Wayback API From c38863f3b170506b28763a1b2fb1e0a111c64564 Mon Sep 17 00:00:00 2001 From: Huohua Dev Date: Mon, 9 Feb 2026 13:41:46 +0800 Subject: [PATCH 4/9] feat: concurrent pagination and structured error handling for OTX - Implement dispatcher+worker pattern for parallel page fetching - Use errors.As with StatusCodeError for proper 429 detection - Stop pagination when has_next is false - Add structured logging with provider/domain/page/status fields --- pkg/providers/otx/otx.go | 119 ++++++++++++++++++++++++++++++++------- 1 file changed, 99 insertions(+), 20 deletions(-) diff --git a/pkg/providers/otx/otx.go b/pkg/providers/otx/otx.go index ae7839c..5c26b64 100644 --- a/pkg/providers/otx/otx.go +++ b/pkg/providers/otx/otx.go @@ -2,7 +2,9 @@ package otx import ( "context" + "errors" "fmt" + "sync" "github.com/bobesa/go-domain-util/domainutil" jsoniter "github.com/json-iterator/go" @@ -47,31 +49,108 @@ func (c *Client) Name() string { } func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error { - for page := uint(1); ; page++ { - select { - case <-ctx.Done(): - return nil - default: - logrus.WithFields(logrus.Fields{"provider": Name, "page": page - 1}).Infof("fetching %s", domain) - apiURL := c.formatURL(domain, page) - resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout) - if err != nil { - return fmt.Errorf("failed to fetch alienvault(%d): %s", page, err) - } - var result otxResult - if err := jsoniter.Unmarshal(resp, &result); err != nil { - return fmt.Errorf("failed to decode otx results for page %d: %s", page, err) - } + numThreads := c.config.ProviderThreads + if numThreads == 0 { + numThreads = 3 + } - for _, entry := range result.URLList { - results <- entry.URL - } + pageChan := make(chan uint, numThreads) + var wg sync.WaitGroup + var fetchErr error + var errMu sync.Mutex + var stopOnce sync.Once + stopCh := make(chan struct{}) - if !result.HasNext { - return nil + // Page dispatcher: sequentially increments pages, stops when receiving stop signal + go func() { + defer close(pageChan) + for page := uint(1); ; page++ { + select { + case <-ctx.Done(): + return + case <-stopCh: + return + case pageChan <- page: } } + }() + + // Workers: fetch pages from pageChan + for i := uint(0); i < numThreads; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for p := range pageChan { + select { + case <-ctx.Done(): + return + default: + } + logrus.WithFields(logrus.Fields{"provider": Name, "page": p - 1}).Infof("fetching %s", domain) + apiURL := c.formatURL(domain, p) + resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout) + if err != nil { + var statusErr *httpclient.StatusCodeError + if errors.As(err, &statusErr) { + logrus.WithFields(logrus.Fields{ + "provider": Name, + "domain": domain, + "page": p - 1, + "status": statusErr.Code, + "error": statusErr.Error(), + }).Warn("OTX HTTP error") + if statusErr.Code == 429 { + errMu.Lock() + if fetchErr == nil { + fetchErr = fmt.Errorf("OTX rate limited at page %d", p) + } + errMu.Unlock() + stopOnce.Do(func() { close(stopCh) }) + return + } + } else { + logrus.WithFields(logrus.Fields{ + "provider": Name, + "domain": domain, + "page": p - 1, + "error": err.Error(), + }).Warn("failed to fetch OTX") + errMu.Lock() + if fetchErr == nil { + fetchErr = fmt.Errorf("failed to fetch alienvault(%d): %s", p, err) + } + errMu.Unlock() + } + continue + } + var result otxResult + if err := jsoniter.Unmarshal(resp, &result); err != nil { + errMu.Lock() + if fetchErr == nil { + fetchErr = fmt.Errorf("failed to decode otx results for page %d: %s", p, err) + } + errMu.Unlock() + continue + } + + for _, entry := range result.URLList { + select { + case <-ctx.Done(): + return + case results <- entry.URL: + } + } + + if !result.HasNext { + stopOnce.Do(func() { close(stopCh) }) + return + } + } + }() } + + wg.Wait() + return fetchErr } func (c *Client) formatURL(domain string, page uint) string { From d92bd9efc2535959ceb6188780685fc098a06ac9 Mon Sep 17 00:00:00 2001 From: Huohua Dev Date: Mon, 9 Feb 2026 13:41:50 +0800 Subject: [PATCH 5/9] feat: concurrent pagination and structured error handling for CommonCrawl - Implement dispatcher+worker pattern using known page count - Cap worker threads to actual page count - Use errors.As with StatusCodeError for proper error classification - Add structured logging for connection errors and API errors --- pkg/providers/commoncrawl/commoncrawl.go | 126 +++++++++++++++++++---- 1 file changed, 105 insertions(+), 21 deletions(-) diff --git a/pkg/providers/commoncrawl/commoncrawl.go b/pkg/providers/commoncrawl/commoncrawl.go index 791ac16..bc3572a 100644 --- a/pkg/providers/commoncrawl/commoncrawl.go +++ b/pkg/providers/commoncrawl/commoncrawl.go @@ -6,6 +6,7 @@ import ( "context" "errors" "fmt" + "sync" jsoniter "github.com/json-iterator/go" "github.com/lc/gau/v2/pkg/httpclient" @@ -56,6 +57,11 @@ func (c *Client) Name() string { func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error { p, err := c.getPagination(domain) if err != nil { + logrus.WithFields(logrus.Fields{ + "provider": Name, + "domain": domain, + "error": err.Error(), + }).Warn("failed to get pagination for commoncrawl") return err } // 0 pages means no results @@ -64,33 +70,111 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string) return nil } - for page := uint(0); page < p.Pages; page++ { - select { - case <-ctx.Done(): - return nil - default: - logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain) - apiURL := c.formatURL(domain, page) - resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout) - if err != nil { - return fmt.Errorf("failed to fetch commoncrawl(%d): %s", page, err) - } + numThreads := c.config.ProviderThreads + if numThreads == 0 { + numThreads = 3 + } - sc := bufio.NewScanner(bytes.NewReader(resp)) - for sc.Scan() { - var res apiResponse - if err := jsoniter.Unmarshal(sc.Bytes(), &res); err != nil { - return fmt.Errorf("failed to decode commoncrawl result: %s", err) + // Cap threads to actual page count + if numThreads > p.Pages { + numThreads = p.Pages + } + + pageChan := make(chan uint, numThreads) + var wg sync.WaitGroup + var fetchErr error + var errMu sync.Mutex + + // Page dispatcher: send page numbers + go func() { + defer close(pageChan) + for page := uint(0); page < p.Pages; page++ { + select { + case <-ctx.Done(): + return + case pageChan <- page: + } + } + }() + + // Workers: fetch pages from pageChan + for i := uint(0); i < numThreads; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for page := range pageChan { + select { + case <-ctx.Done(): + return + default: } - if res.Error != "" { - return fmt.Errorf("received an error from commoncrawl: %s", res.Error) + logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain) + apiURL := c.formatURL(domain, page) + resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout) + if err != nil { + var statusErr *httpclient.StatusCodeError + if errors.As(err, &statusErr) { + logrus.WithFields(logrus.Fields{ + "provider": Name, + "domain": domain, + "page": page, + "status": statusErr.Code, + "error": statusErr.Error(), + }).Warn("CommonCrawl HTTP error") + } else { + logrus.WithFields(logrus.Fields{ + "provider": Name, + "domain": domain, + "page": page, + "error": err.Error(), + }).Warn("failed to fetch commoncrawl") + } + errMu.Lock() + if fetchErr == nil { + fetchErr = fmt.Errorf("failed to fetch commoncrawl(%d): %s", page, err) + } + errMu.Unlock() + continue } - results <- res.URL + sc := bufio.NewScanner(bytes.NewReader(resp)) + for sc.Scan() { + var res apiResponse + if err := jsoniter.Unmarshal(sc.Bytes(), &res); err != nil { + errMu.Lock() + if fetchErr == nil { + fetchErr = fmt.Errorf("failed to decode commoncrawl result: %s", err) + } + errMu.Unlock() + continue + } + if res.Error != "" { + logrus.WithFields(logrus.Fields{ + "provider": Name, + "domain": domain, + "page": page, + "response": res.Error, + }).Warn("CommonCrawl API error") + errMu.Lock() + if fetchErr == nil { + fetchErr = fmt.Errorf("received an error from commoncrawl: %s", res.Error) + } + errMu.Unlock() + continue + } + + select { + case <-ctx.Done(): + return + case results <- res.URL: + } + } } - } + }() } - return nil + + wg.Wait() + return fetchErr } func (c *Client) formatURL(domain string, page uint) string { From 307ea825f485e90a08e134978198c38e41c7b5cc Mon Sep 17 00:00:00 2001 From: Huohua Dev Date: Mon, 9 Feb 2026 13:41:55 +0800 Subject: [PATCH 6/9] feat: add structured error logging for URLScan provider - Add provider/domain/page/error fields to warning logs - Add response body to rate-limit log for debugging --- pkg/providers/urlscan/urlscan.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/pkg/providers/urlscan/urlscan.go b/pkg/providers/urlscan/urlscan.go index a0126dc..acda475 100644 --- a/pkg/providers/urlscan/urlscan.go +++ b/pkg/providers/urlscan/urlscan.go @@ -50,6 +50,12 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string) apiURL := c.formatURL(domain, searchAfter) resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout, header) if err != nil { + logrus.WithFields(logrus.Fields{ + "provider": Name, + "domain": domain, + "page": page, + "error": err.Error(), + }).Warn("failed to fetch urlscan") return fmt.Errorf("failed to fetch urlscan: %s", err) } var result apiResponse @@ -60,7 +66,12 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string) } // rate limited if result.Status == 429 { - logrus.WithField("provider", "urlscan").Warnf("urlscan responded with 429, probably being rate limited") + logrus.WithFields(logrus.Fields{ + "provider": Name, + "domain": domain, + "status": 429, + "response": string(resp), + }).Warn("urlscan rate limited") return nil } From e3736a684747a412fbdb67dbb88bb4ba3e33e10a Mon Sep 17 00:00:00 2001 From: Huohua Dev Date: Mon, 9 Feb 2026 13:42:00 +0800 Subject: [PATCH 7/9] feat: add --provider-threads flag for per-provider concurrent pagination - Add ProviderThreads field to providers.Config - Register --provider-threads CLI flag with default value 3 - Support provider-threads in .gau.toml config file --- pkg/providers/providers.go | 1 + runner/flags/flags.go | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/pkg/providers/providers.go b/pkg/providers/providers.go index 8ac774a..371fffa 100644 --- a/pkg/providers/providers.go +++ b/pkg/providers/providers.go @@ -22,6 +22,7 @@ type URLScan struct { type Config struct { Threads uint + ProviderThreads uint Timeout uint MaxRetries uint IncludeSubdomains bool diff --git a/runner/flags/flags.go b/runner/flags/flags.go index 4448314..7bf1ae7 100644 --- a/runner/flags/flags.go +++ b/runner/flags/flags.go @@ -29,6 +29,7 @@ type Config struct { Filters providers.Filters `mapstructure:"filters"` Proxy string `mapstructure:"proxy"` Threads uint `mapstructure:"threads"` + ProviderThreads uint `mapstructure:"provider-threads"` Timeout uint `mapstructure:"timeout"` Verbose bool `mapstructure:"verbose"` MaxRetries uint `mapstructure:"retries"` @@ -62,6 +63,7 @@ func (c *Config) ProviderConfig() (*providers.Config, error) { pc := &providers.Config{ Threads: c.Threads, + ProviderThreads: c.ProviderThreads, Timeout: c.Timeout, MaxRetries: c.MaxRetries, IncludeSubdomains: c.IncludeSubdomains, @@ -101,6 +103,7 @@ func New() *Options { pflag.String("o", "", "filename to write results to") pflag.String("config", "", "location of config file (default $HOME/.gau.toml or %USERPROFILE%\\.gau.toml)") pflag.Uint("threads", 1, "number of workers to spawn") + pflag.Uint("provider-threads", 3, "number of threads per provider for concurrent pagination") pflag.Uint("timeout", 45, "timeout (in seconds) for HTTP client") pflag.Uint("retries", 0, "retries for HTTP client") pflag.String("proxy", "", "http proxy to use") @@ -177,6 +180,7 @@ func (o *Options) DefaultConfig() *Config { Proxy: "", Timeout: 45, Threads: 1, + ProviderThreads: 3, Verbose: false, MaxRetries: 5, IncludeSubdomains: false, @@ -201,6 +205,7 @@ func (o *Options) getFlagValues(c *Config) { outfile := o.viper.GetString("o") fetchers := o.viper.GetStringSlice("providers") threads := o.viper.GetUint("threads") + providerThreads := o.viper.GetUint("provider-threads") blacklist := o.viper.GetStringSlice("blacklist") subs := o.viper.GetBool("subs") fp := o.viper.GetBool("fp") @@ -222,6 +227,11 @@ func (o *Options) getFlagValues(c *Config) { c.Threads = threads } + // set if --provider-threads flag is set, otherwise use default + if providerThreads > 0 { + c.ProviderThreads = providerThreads + } + // set if --blacklist flag is specified, otherwise use default if len(blacklist) > 0 { c.Blacklist = blacklist From 7527901fa835f72508944ba206b4252675c30bb6 Mon Sep 17 00:00:00 2001 From: Huohua Dev Date: Mon, 9 Feb 2026 13:42:04 +0800 Subject: [PATCH 8/9] feat: add per-provider timeout control - Create timeout context for each provider work item - Cap provider timeout at 5 minutes to prevent single provider blocking - Add structured logging with provider/domain/timeout fields --- runner/runner.go | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/runner/runner.go b/runner/runner.go index 28133ae..5cde1ad 100644 --- a/runner/runner.go +++ b/runner/runner.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "sync" + "time" "github.com/lc/gau/v2/pkg/providers" "github.com/lc/gau/v2/pkg/providers/commoncrawl" @@ -18,12 +19,14 @@ type Runner struct { Providers []providers.Provider threads uint + timeout uint ctx context.Context } // Init initializes the runner func (r *Runner) Init(c *providers.Config, providers []string, filters providers.Filters) error { r.threads = c.Threads + r.timeout = c.Timeout for _, name := range providers { switch name { case "urlscan": @@ -78,9 +81,29 @@ func (r *Runner) worker(ctx context.Context, workChan chan Work, results chan st if !ok { return } - if err := work.Do(ctx, results); err != nil { - logrus.WithField("provider", work.provider.Name()).Warnf("%s - %v", work.domain, err) + + // Create a timeout context for this provider using global timeout + providerTimeout := time.Duration(r.timeout) * time.Second + if providerTimeout == 0 { + providerTimeout = 45 * time.Second + } + // Cap provider timeout at 5 minutes to prevent single provider blocking + maxProviderTimeout := 5 * time.Minute + if providerTimeout > maxProviderTimeout { + providerTimeout = maxProviderTimeout + } + + providerCtx, cancel := context.WithTimeout(ctx, providerTimeout) + + if err := work.Do(providerCtx, results); err != nil { + logrus.WithFields(logrus.Fields{ + "provider": work.provider.Name(), + "domain": work.domain, + "timeout": providerTimeout, + }).Warnf("provider error: %v", err) } + + cancel() } } } From 86db5c64848c706a6a17e070eadaae62e32f8701 Mon Sep 17 00:00:00 2001 From: Huohua Dev Date: Mon, 9 Feb 2026 13:42:08 +0800 Subject: [PATCH 9/9] feat: add execution summary with URL count and duration - Track total URL count using atomic counter - Log summary with total URLs and duration on exit --- cmd/gau/main.go | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/cmd/gau/main.go b/cmd/gau/main.go index 8b4e8a5..969c6d4 100644 --- a/cmd/gau/main.go +++ b/cmd/gau/main.go @@ -6,6 +6,7 @@ import ( "io" "os" "sync" + "time" "github.com/lc/gau/v2/pkg/output" "github.com/lc/gau/v2/runner" @@ -14,6 +15,8 @@ import ( ) func main() { + startTime := time.Now() + cfg, err := flags.New().ReadInConfig() if err != nil { log.Warnf("error reading config: %v", err) @@ -43,12 +46,13 @@ func main() { } var writeWg sync.WaitGroup + var urlCount int64 writeWg.Add(1) go func(out io.Writer, JSON bool) { defer writeWg.Done() if JSON { - output.WriteURLsJSON(out, results, config.Blacklist, config.RemoveParameters) - } else if err = output.WriteURLs(out, results, config.Blacklist, config.RemoveParameters); err != nil { + output.WriteURLsJSON(out, results, config.Blacklist, config.RemoveParameters, &urlCount) + } else if err = output.WriteURLs(out, results, config.Blacklist, config.RemoveParameters, &urlCount); err != nil { log.Fatalf("error writing results: %v\n", err) } }(out, config.JSON) @@ -85,4 +89,13 @@ func main() { // wait for writer to finish output writeWg.Wait() + + // Calculate duration + duration := time.Since(startTime) + + // Log summary + log.Infof("=== Gau Execution Summary ===") + log.Infof("Total URLs: %d", urlCount) + log.Infof("Duration: %v", duration) + log.Infof("=============================") }