From d2eade4c2da06e4ae1c11bb47676722cd2e35e33 Mon Sep 17 00:00:00 2001 From: Katie Mulliken Date: Mon, 30 Mar 2026 08:19:38 -0400 Subject: [PATCH] feat: add git sparse checkout mode for batch scanning Adds --clone flag to batch and discover commands, scanning repos via local git sparse checkout instead of the GitHub API. This avoids API rate limits when scanning large numbers of repos. Key changes: - internal/git: sparse clone package with concurrent clone-and-scan - Sliding star-count windows in FetchTopRepos to paginate beyond GitHub's 1,000-result search limit - Repo list caching in SQLite for --resume with --top N - SQLite hardening: single-conn serialization + busy_timeout for concurrent goroutine writes Co-Authored-By: Claude Opus 4.6 --- cmd/fluxgate/main.go | 217 ++++++++++++++++++++++++++++++----- internal/git/sparse.go | 160 ++++++++++++++++++++++++++ internal/git/sparse_test.go | 136 ++++++++++++++++++++++ internal/github/batch.go | 67 +++++++++-- internal/store/migrations.go | 20 ++++ internal/store/sqlite.go | 72 +++++++++++- 6 files changed, 638 insertions(+), 34 deletions(-) create mode 100644 internal/git/sparse.go create mode 100644 internal/git/sparse_test.go diff --git a/cmd/fluxgate/main.go b/cmd/fluxgate/main.go index 67de565..56f9efa 100644 --- a/cmd/fluxgate/main.go +++ b/cmd/fluxgate/main.go @@ -18,6 +18,7 @@ import ( "github.com/north-echo/fluxgate/internal/dashboard" "github.com/north-echo/fluxgate/internal/diff" "github.com/north-echo/fluxgate/internal/export" + gitclone "github.com/north-echo/fluxgate/internal/git" ghclient "github.com/north-echo/fluxgate/internal/github" gitlabclient "github.com/north-echo/fluxgate/internal/gitlab" "github.com/north-echo/fluxgate/internal/merge" @@ -218,16 +219,19 @@ Platforms: func newBatchCmd() *cobra.Command { var ( - top int - dbPath string - list string - resume bool - delay time.Duration - reportPath string - token string - tokens string - severities string - rules string + top int + dbPath string + list string + resume bool + delay time.Duration + reportPath string + token string + tokens string + severities string + rules string + useClone bool + concurrency int + keepDir string ) cmd := &cobra.Command{ @@ -265,6 +269,12 @@ func newBatchCmd() *cobra.Command { } client = ghclient.NewClient(token) } + + if useClone { + if err := gitclone.CheckGit(); err != nil { + return err + } + } ctx := context.Background() var repos []ghclient.RepoInfo @@ -274,16 +284,47 @@ func newBatchCmd() *cobra.Command { return fmt.Errorf("loading repo list: %w", err) } } else if top > 0 { - fmt.Printf("Fetching top %d repos by stars...\n", top) - repos, err = client.FetchTopRepos(ctx, top) - if err != nil { - return fmt.Errorf("fetching top repos: %w", err) + cacheKey := fmt.Sprintf("top:%d", top) + if resume { + cached, _ := db.LoadRepoList(cacheKey) + if cached != nil { + fmt.Printf("Loaded %d repos from cache\n\n", len(cached)) + for _, c := range cached { + repos = append(repos, ghclient.RepoInfo{Owner: c.Owner, Name: c.Name, Stars: c.Stars, Language: c.Language}) + } + } + } + if len(repos) == 0 { + fmt.Printf("Fetching top %d repos by stars...\n", top) + repos, err = client.FetchTopRepos(ctx, top) + if err != nil { + return fmt.Errorf("fetching top repos: %w", err) + } + fmt.Printf("Found %d repos\n\n", len(repos)) + + entries := make([]store.RepoListEntry, len(repos)) + for i, r := range repos { + entries[i] = store.RepoListEntry{Owner: r.Owner, Name: r.Name, Stars: r.Stars, Language: r.Language} + } + if saveErr := db.SaveRepoList(cacheKey, entries); saveErr != nil { + fmt.Fprintf(os.Stderr, "Warning: could not cache repo list: %v\n", saveErr) + } } - fmt.Printf("Found %d repos\n\n", len(repos)) } else { return fmt.Errorf("specify --top N or --list file") } + if useClone { + return batchScanWithClone(ctx, repos, cloneScanOptions{ + DB: db, + Token: token, + ScanOpts: parseScanOpts(severities, rules), + Resume: resume, + Concurrency: concurrency, + KeepDir: keepDir, + }) + } + batchOpts := ghclient.BatchOptions{ Top: top, List: list, @@ -316,23 +357,126 @@ func newBatchCmd() *cobra.Command { cmd.Flags().StringVar(&severities, "severity", "", "Filter by severity (comma-separated)") cmd.Flags().StringVar(&rules, "rules", "", "Filter by rule ID (comma-separated)") cmd.Flags().DurationVar(&delay, "delay", 0, "Delay between repos to avoid rate limits (e.g. 1s, 500ms)") + cmd.Flags().BoolVar(&useClone, "clone", false, "Use git sparse checkout instead of API (avoids rate limits)") + cmd.Flags().IntVar(&concurrency, "concurrency", 5, "Number of concurrent clone operations (used with --clone)") + cmd.Flags().StringVar(&keepDir, "keep", "", "Keep cloned repos in this directory instead of cleaning up") return cmd } +// cloneScanOptions groups parameters for clone-based batch scanning. +type cloneScanOptions struct { + DB *store.DB + Token string + ScanOpts scanner.ScanOptions + Resume bool + Concurrency int + KeepDir string +} + +// batchScanWithClone scans repos by sparse-cloning them locally instead of +// fetching workflows via the GitHub API. Each repo is cloned, scanned, and +// cleaned up within a goroutine, bounding disk usage to O(concurrency). +func batchScanWithClone(ctx context.Context, repos []ghclient.RepoInfo, copts cloneScanOptions) error { + var toScan []ghclient.RepoInfo + for _, repo := range repos { + if copts.Resume { + already, err := copts.DB.IsRepoScanned(repo.Owner, repo.Name) + if err != nil { + return fmt.Errorf("checking if %s/%s is scanned: %w", repo.Owner, repo.Name, err) + } + if already { + continue + } + } + toScan = append(toScan, repo) + } + + if len(toScan) == 0 { + fmt.Println("All repos already scanned.") + return nil + } + + if copts.KeepDir != "" { + fmt.Fprintf(os.Stderr, "Keeping clones in: %s\n", copts.KeepDir) + if err := os.MkdirAll(copts.KeepDir, 0o750); err != nil { + return fmt.Errorf("creating keep dir: %w", err) + } + } + + repoInfo := make(map[string]ghclient.RepoInfo, len(toScan)) + cloneRepos := make([]gitclone.Repo, len(toScan)) + for i, r := range toScan { + cloneRepos[i] = gitclone.Repo{Owner: r.Owner, Name: r.Name} + repoInfo[r.Owner+"/"+r.Name] = r + } + + fmt.Printf("Scanning %d repos via clone (concurrency: %d)...\n", len(toScan), copts.Concurrency) + + results := gitclone.CloneAndScan(ctx, cloneRepos, copts.Concurrency, copts.Token, copts.KeepDir, + func(owner, name, dir string, cr *gitclone.CloneResult) error { + key := owner + "/" + name + info := repoInfo[key] + + scanResult, err := scanner.ScanDirectory(dir, copts.ScanOpts) + if err != nil { + emptyResult := &scanner.ScanResult{Path: key} + if saveErr := copts.DB.SaveResult(owner, name, info.Stars, info.Language, emptyResult); saveErr != nil { + fmt.Fprintf(os.Stderr, " Warning: could not save error state: %v\n", saveErr) + } + return err + } + + cr.SetFindings(len(scanResult.Findings), scanResult.Workflows) + scanResult.Path = key + return copts.DB.SaveResult(owner, name, info.Stars, info.Language, scanResult) + }) + + scanned, withFindings := 0, 0 + for i, cr := range results { + key := cr.Owner + "/" + cr.Name + info := repoInfo[key] + + fmt.Printf("[%d/%d] %s", i+1, len(results), key) + if info.Stars > 0 { + fmt.Printf(" (%d stars)", info.Stars) + } + + if cr.Err != nil { + fmt.Printf(" error: %v\n", cr.Err) + continue + } + + scanned++ + if cr.Findings > 0 { + withFindings++ + fmt.Printf(" %d issues in %d workflows\n", cr.Findings, cr.Workflows) + } else { + fmt.Println(" clean") + } + } + + fmt.Printf("\nBatch complete: %d scanned, %d with findings, %d skipped\n", + scanned, withFindings, len(repos)-len(toScan)) + return nil +} + func newDiscoverCmd() *cobra.Command { var ( - trigger string - minStars int - maxPages int - dbPath string - delay time.Duration - resume bool - token string - severities string - rules string - listOnly bool - output string + trigger string + minStars int + maxPages int + dbPath string + delay time.Duration + resume bool + token string + severities string + rules string + listOnly bool + output string + useClone bool + concurrency int + keepDir string ) cmd := &cobra.Command{ @@ -343,6 +487,13 @@ func newDiscoverCmd() *cobra.Command { if token == "" { token = os.Getenv("GITHUB_TOKEN") } + + if useClone { + if err := gitclone.CheckGit(); err != nil { + return err + } + } + client := ghclient.NewClient(token) ctx := context.Background() @@ -380,6 +531,17 @@ func newDiscoverCmd() *cobra.Command { } defer db.Close() + if useClone { + return batchScanWithClone(ctx, repos, cloneScanOptions{ + DB: db, + Token: token, + ScanOpts: parseScanOpts(severities, rules), + Resume: resume, + Concurrency: concurrency, + KeepDir: keepDir, + }) + } + batchOpts := ghclient.BatchOptions{ Resume: resume, Delay: delay, @@ -402,6 +564,9 @@ func newDiscoverCmd() *cobra.Command { cmd.Flags().StringVar(&rules, "rules", "", "Filter by rule ID (comma-separated)") cmd.Flags().BoolVar(&listOnly, "list-only", false, "Output repo list without scanning") cmd.Flags().StringVarP(&output, "output", "o", "", "Output file for --list-only (default: stdout)") + cmd.Flags().BoolVar(&useClone, "clone", false, "Use git sparse checkout instead of API (avoids rate limits)") + cmd.Flags().IntVar(&concurrency, "concurrency", 5, "Number of concurrent clone operations (used with --clone)") + cmd.Flags().StringVar(&keepDir, "keep", "", "Keep cloned repos in this directory instead of cleaning up") return cmd } diff --git a/internal/git/sparse.go b/internal/git/sparse.go new file mode 100644 index 0000000..ebf0482 --- /dev/null +++ b/internal/git/sparse.go @@ -0,0 +1,160 @@ +package git + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "sync" +) + +// CloneResult holds the outcome of a single clone-and-scan operation. +type CloneResult struct { + Owner string + Name string + Err error + Findings int // populated by ScanFunc via SetFindings + Workflows int // populated by ScanFunc via SetFindings +} + +// SetFindings records scan metrics in the result. Call this from ScanFunc. +func (r *CloneResult) SetFindings(findings, workflows int) { + r.Findings = findings + r.Workflows = workflows +} + +// CheckGit verifies that git is available in PATH. +func CheckGit() error { + _, err := exec.LookPath("git") + if err != nil { + return fmt.Errorf("git is required for --clone mode but was not found in PATH") + } + return nil +} + +// runGit executes a git command in the given directory, suppressing output. +func runGit(ctx context.Context, dir string, args ...string) error { + cmd := exec.CommandContext(ctx, "git", args...) + cmd.Dir = dir + return cmd.Run() +} + +// SparseClone clones a GitHub repository using sparse checkout, fetching only +// the .github/ directory. +// +// If token is non-empty, it is injected via GIT_ASKPASS to avoid exposing +// credentials in process argument lists. +func SparseClone(ctx context.Context, owner, repo, destDir, token string) error { + repoURL := fmt.Sprintf("https://github.com/%s/%s.git", owner, repo) + + cloneCmd := exec.CommandContext(ctx, "git", "clone", + "--filter=blob:none", + "--no-checkout", + "--depth=1", + repoURL, + destDir, + ) + if token != "" { + cloneCmd.Env = append(os.Environ(), tokenAskPassEnv(token)...) + } + if err := cloneCmd.Run(); err != nil { + return fmt.Errorf("git clone %s/%s: %w", owner, repo, err) + } + + // Non-cone mode excludes root-level files; cone mode always includes them. + if err := runGit(ctx, destDir, "sparse-checkout", "init", "--no-cone"); err != nil { + return fmt.Errorf("git sparse-checkout init for %s/%s: %w", owner, repo, err) + } + + if err := runGit(ctx, destDir, "sparse-checkout", "set", "/.github/"); err != nil { + return fmt.Errorf("git sparse-checkout set for %s/%s: %w", owner, repo, err) + } + + if err := runGit(ctx, destDir, "checkout"); err != nil { + return fmt.Errorf("git checkout for %s/%s: %w", owner, repo, err) + } + + return nil +} + +// tokenAskPassEnv returns environment variables that inject a GitHub token +// via GIT_ASKPASS, keeping the token out of process argument lists. +func tokenAskPassEnv(token string) []string { + return []string{ + "GIT_ASKPASS=printf", + "GIT_TERMINAL_PROMPT=0", + fmt.Sprintf("GIT_PASSWORD=%s", token), + } +} + +// ScanFunc is called with the cloned repo directory and a result pointer. +// It should scan the directory, persist results, and call result.SetFindings +// to record metrics. Errors are recorded in CloneResult. +type ScanFunc func(owner, name, dir string, result *CloneResult) error + +// Repo identifies a repository to clone. +type Repo struct { + Owner string + Name string +} + +// CloneAndScan clones repos concurrently, calling scanFn on each clone as it +// completes. Each clone is removed after scanning unless keepDir is non-empty. +// This bounds disk usage to O(concurrency) rather than O(total repos). +func CloneAndScan(ctx context.Context, repos []Repo, concurrency int, token, keepDir string, scanFn ScanFunc) []CloneResult { + if concurrency < 1 { + concurrency = 5 + } + + results := make([]CloneResult, len(repos)) + sem := make(chan struct{}, concurrency) + var wg sync.WaitGroup + + for i := range repos { + wg.Add(1) + go func(idx int) { + defer wg.Done() + + sem <- struct{}{} + defer func() { <-sem }() + + owner, name := repos[idx].Owner, repos[idx].Name + results[idx] = CloneResult{Owner: owner, Name: name} + + select { + case <-ctx.Done(): + results[idx].Err = ctx.Err() + return + default: + } + + var repoDir string + if keepDir != "" { + repoDir = filepath.Join(keepDir, owner, name) + if err := os.MkdirAll(filepath.Dir(repoDir), 0o750); err != nil { + results[idx].Err = err + return + } + } else { + tmpDir, err := os.MkdirTemp("", "fluxgate-*") + if err != nil { + results[idx].Err = err + return + } + repoDir = filepath.Join(tmpDir, owner, name) + defer os.RemoveAll(tmpDir) + } + + if err := SparseClone(ctx, owner, name, repoDir, token); err != nil { + results[idx].Err = err + return + } + + results[idx].Err = scanFn(owner, name, repoDir, &results[idx]) + }(i) + } + + wg.Wait() + return results +} diff --git a/internal/git/sparse_test.go b/internal/git/sparse_test.go new file mode 100644 index 0000000..ad27b6e --- /dev/null +++ b/internal/git/sparse_test.go @@ -0,0 +1,136 @@ +package git + +import ( + "context" + "os" + "os/exec" + "path/filepath" + "testing" +) + +func TestCheckGit(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not available in test environment") + } + if err := CheckGit(); err != nil { + t.Fatalf("CheckGit() returned error: %v", err) + } +} + +func TestCheckGitMissingBinary(t *testing.T) { + t.Setenv("PATH", "") + err := CheckGit() + if err == nil { + t.Fatal("expected error when git is not in PATH") + } + want := "git is required for --clone mode but was not found in PATH" + if err.Error() != want { + t.Errorf("got %q, want %q", err.Error(), want) + } +} + +func TestSparseClone(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test in short mode") + } + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not available") + } + + ctx := context.Background() + cloneDir := filepath.Join(t.TempDir(), "clone") + + err := SparseClone(ctx, "actions", "checkout", cloneDir, "") + if err != nil { + t.Fatalf("SparseClone() error: %v", err) + } + + entries, err := os.ReadDir(cloneDir) + if err != nil { + t.Fatalf("reading clone dir: %v", err) + } + + for _, entry := range entries { + if entry.Name() == ".git" || entry.Name() == ".github" { + continue + } + t.Errorf("unexpected file in sparse checkout: %s", entry.Name()) + } + + if _, err := os.Stat(filepath.Join(cloneDir, ".github", "workflows")); err != nil { + t.Errorf(".github/workflows/ should exist: %v", err) + } +} + +func TestSparseCloneInvalidRepo(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test in short mode") + } + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not available") + } + + ctx := context.Background() + cloneDir := filepath.Join(t.TempDir(), "clone") + + err := SparseClone(ctx, "nonexistent-owner-xyzzy", "nonexistent-repo-xyzzy", cloneDir, "") + if err == nil { + t.Fatal("expected error for invalid repo, got nil") + } +} + +func TestCloneAndScan(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test in short mode") + } + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not available") + } + + ctx := context.Background() + repos := []Repo{ + {Owner: "actions", Name: "checkout"}, + {Owner: "nonexistent-owner-xyzzy", Name: "nonexistent-repo-xyzzy"}, + } + + var scannedDirs []string + results := CloneAndScan(ctx, repos, 2, "", "", + func(owner, name, dir string, cr *CloneResult) error { + scannedDirs = append(scannedDirs, dir) + cr.SetFindings(5, 2) + return nil + }) + + if len(results) != 2 { + t.Fatalf("expected 2 results, got %d", len(results)) + } + + var success, failure int + for _, r := range results { + if r.Err != nil { + failure++ + } else { + success++ + } + } + + if success != 1 { + t.Errorf("expected 1 success, got %d", success) + } + if failure != 1 { + t.Errorf("expected 1 failure, got %d", failure) + } + + if len(scannedDirs) != 1 { + t.Errorf("expected scanFn called once, got %d", len(scannedDirs)) + } + + // Verify findings were recorded + for _, r := range results { + if r.Err == nil { + if r.Findings != 5 || r.Workflows != 2 { + t.Errorf("expected findings=5, workflows=2, got %d, %d", r.Findings, r.Workflows) + } + } + } +} diff --git a/internal/github/batch.go b/internal/github/batch.go index aea66b2..a6e0c4e 100644 --- a/internal/github/batch.go +++ b/internal/github/batch.go @@ -4,6 +4,7 @@ import ( "bufio" "context" "fmt" + "math" "os" "strings" "time" @@ -32,14 +33,55 @@ type RepoInfo struct { } // FetchTopRepos returns the top N repos by star count from GitHub search API. +// GitHub limits search results to 1,000 per query, so for N > 1000 we use +// sliding star-count windows to paginate beyond the limit. func (c *Client) FetchTopRepos(ctx context.Context, top int) ([]RepoInfo, error) { var repos []RepoInfo + seen := make(map[string]bool) perPage := 100 if top < perPage { perPage = top } - for page := 1; len(repos) < top; page++ { + // Start with a broad query; narrow the star range when we hit the 1000-result ceiling + maxStars := 0 // 0 means no upper bound + minStars := 1000 + + for len(repos) < top { + query := fmt.Sprintf("stars:%d..%d", minStars, maxStars) + if maxStars == 0 { + query = fmt.Sprintf("stars:>%d", minStars) + } + + windowRepos, lowestStars, err := c.fetchSearchWindow(ctx, query, perPage, top-len(repos), seen) + if err != nil { + return repos, err + } + + repos = append(repos, windowRepos...) + fmt.Printf(" %d repos collected so far (stars >= %d)\n", len(repos), lowestStars) + + if len(windowRepos) == 0 || lowestStars <= minStars { + break + } + + // Slide the window: next query gets repos with fewer stars + maxStars = lowestStars + } + + if len(repos) > top { + repos = repos[:top] + } + return repos, nil +} + +// fetchSearchWindow fetches up to `limit` repos matching a star query, returning +// the repos found, the lowest star count seen, and any error. +func (c *Client) fetchSearchWindow(ctx context.Context, query string, perPage, limit int, seen map[string]bool) ([]RepoInfo, int, error) { + var repos []RepoInfo + lowestStars := math.MaxInt + + for page := 1; len(repos) < limit && page <= 10; page++ { searchOpts := &gh.SearchOptions{ Sort: "stars", Order: "desc", @@ -51,32 +93,43 @@ func (c *Client) FetchTopRepos(ctx context.Context, top int) ([]RepoInfo, error) result, err := withRetryRotate(ctx, c, func() retryableFunc[*gh.RepositoriesSearchResult] { return func(ctx context.Context) (*gh.RepositoriesSearchResult, *gh.Response, error) { - result, resp, err := c.gh.Search.Repositories(ctx, "stars:>1000", searchOpts) + result, resp, err := c.gh.Search.Repositories(ctx, query, searchOpts) return result, resp, err } }) if err != nil { - return repos, fmt.Errorf("searching repos (page %d): %w", page, err) + return repos, lowestStars, fmt.Errorf("searching repos (query=%s page=%d): %w", query, page, err) } for _, r := range result.Repositories { + key := r.GetOwner().GetLogin() + "/" + r.GetName() + if seen[key] { + continue + } + seen[key] = true + + stars := r.GetStargazersCount() + if stars < lowestStars { + lowestStars = stars + } + repos = append(repos, RepoInfo{ Owner: r.GetOwner().GetLogin(), Name: r.GetName(), - Stars: r.GetStargazersCount(), + Stars: stars, Language: r.GetLanguage(), }) - if len(repos) >= top { + if len(repos) >= limit { break } } if len(result.Repositories) < perPage { - break // no more results + break } } - return repos, nil + return repos, lowestStars, nil } // LoadRepoList reads repos from a file (one owner/repo per line). diff --git a/internal/store/migrations.go b/internal/store/migrations.go index 78b16d5..ce7d91e 100644 --- a/internal/store/migrations.go +++ b/internal/store/migrations.go @@ -77,3 +77,23 @@ CREATE TABLE IF NOT EXISTS patches ( CREATE INDEX IF NOT EXISTS idx_patches_disclosure ON patches(disclosure_id); ` + +const migration005AddRepoLists = ` +CREATE TABLE IF NOT EXISTS repo_lists ( + id INTEGER PRIMARY KEY, + query TEXT NOT NULL UNIQUE, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE IF NOT EXISTS repo_list_entries ( + id INTEGER PRIMARY KEY, + list_id INTEGER REFERENCES repo_lists(id), + owner TEXT NOT NULL, + name TEXT NOT NULL, + stars INTEGER, + language TEXT, + position INTEGER NOT NULL +); + +CREATE INDEX IF NOT EXISTS idx_repo_list_entries_list ON repo_list_entries(list_id, position); +` diff --git a/internal/store/sqlite.go b/internal/store/sqlite.go index 3f008f0..c1fbaaa 100644 --- a/internal/store/sqlite.go +++ b/internal/store/sqlite.go @@ -48,12 +48,20 @@ func Open(path string) (*DB, error) { return nil, fmt.Errorf("opening database: %w", err) } - // Enable WAL mode for better concurrent access + // Serialize all access through a single connection to prevent SQLITE_BUSY + // under concurrent goroutine writes. WAL mode still helps with read perf. + db.SetMaxOpenConns(1) + if _, err := db.Exec("PRAGMA journal_mode=WAL"); err != nil { db.Close() return nil, fmt.Errorf("setting WAL mode: %w", err) } + if _, err := db.Exec("PRAGMA busy_timeout=30000"); err != nil { + db.Close() + return nil, fmt.Errorf("setting busy timeout: %w", err) + } + if _, err := db.Exec(schema); err != nil { db.Close() return nil, fmt.Errorf("running migrations: %w", err) @@ -71,6 +79,68 @@ func runMigrations(db *sqlx.DB) { db.Exec(migration002Disclosures) db.Exec(migration003Patches) db.Exec(migration004NoWorkflows) + db.Exec(migration005AddRepoLists) +} + +// RepoListEntry holds a cached repo from a saved list. +type RepoListEntry struct { + Owner string `db:"owner"` + Name string `db:"name"` + Stars int `db:"stars"` + Language string `db:"language"` +} + +// SaveRepoList caches a fetched repo list under a query key (e.g. "top:5000"). +func (d *DB) SaveRepoList(query string, repos []RepoListEntry) error { + tx, err := d.db.Beginx() + if err != nil { + return err + } + defer tx.Rollback() + + if _, err := tx.Exec("DELETE FROM repo_list_entries WHERE list_id IN (SELECT id FROM repo_lists WHERE query = ?)", query); err != nil { + return err + } + if _, err := tx.Exec("DELETE FROM repo_lists WHERE query = ?", query); err != nil { + return err + } + + res, err := tx.Exec("INSERT INTO repo_lists (query) VALUES (?)", query) + if err != nil { + return err + } + listID, _ := res.LastInsertId() + + for i, r := range repos { + _, err := tx.Exec( + "INSERT INTO repo_list_entries (list_id, owner, name, stars, language, position) VALUES (?, ?, ?, ?, ?, ?)", + listID, r.Owner, r.Name, r.Stars, r.Language, i, + ) + if err != nil { + return err + } + } + + return tx.Commit() +} + +// LoadRepoList loads a cached repo list by query key. Returns nil if not found. +func (d *DB) LoadRepoList(query string) ([]RepoListEntry, error) { + var entries []RepoListEntry + err := d.db.Select(&entries, ` + SELECT e.owner, e.name, e.stars, e.language + FROM repo_list_entries e + JOIN repo_lists l ON l.id = e.list_id + WHERE l.query = ? + ORDER BY e.position + `, query) + if err != nil { + return nil, err + } + if len(entries) == 0 { + return nil, nil + } + return entries, nil } // Close closes the database connection.