diff --git a/README.md b/README.md
index ca16fb9..bb66078 100644
--- a/README.md
+++ b/README.md
@@ -89,6 +89,25 @@ Because the command is exec'd without a shell, the volume path is never string-c
> **Don't double-schedule verification.** If your external tool already runs its own verify on a timer (e.g. a cron/systemd job), don't *also* set `interval` for a verify command — two heavy passes will step on each other. Pick one driver: let squirrel schedule it (so the result lands in `squirrel hooks` / the TUI) **or** let the tool schedule it (maximum independence — verification keeps happening even when the agent is down), not both.
+### Index snapshots
+
+The catalog should be as redundant as the data it describes. After every successful sync, squirrel takes one `VACUUM INTO` snapshot of the whole index (a self-contained, `db check`-able `.db` file) to a **local tier** and — for destination (bucket/sftp/…) syncs — rides a copy **along to the destination**, under each synced volume's `.squirrel-index/`. A restore-from-cloud then yields the data *and* the index that explains it.
+
+This is **on by default, zero-config** — an absent `[backups]` table means it's enabled with the defaults below. Override or disable via:
+
+```toml
+[backups]
+enabled = true # local snapshot-on-sync (default true)
+dir = "" # local snapshot directory (default:
/backups)
+keep = 7 # local snapshots kept (rotation; 0 = keep all)
+cloud = true # ride a copy along to destination buckets (default true)
+cloud_keep = 7 # snapshots kept per //.squirrel-index/ (0 = keep all)
+```
+
+`enabled = false` disables both halves; `cloud = false` keeps the local snapshot but uploads nothing. Snapshots are named `index--run-.db` — lexically sortable and traceable to the run that produced them. A single snapshot is taken per `squirrel sync` invocation and fanned out to every target; a snapshot or upload failure is surfaced as a warning but never fails the sync.
+
+> **Privacy.** The ride-along payload is the *full global* `index.db` — paths and BLAKE3 hashes for **all** volumes (never file contents). It lands in the same bucket as your data (same trust boundary). Use a private bucket and server-side encryption.
+
## Quickstart
Index a configured volume:
@@ -173,6 +192,7 @@ Each destination is a tree shaped like the local volumes:
pictures/
2024/cat.jpg
.squirrel-history/run-7/2024/cat.jpg # prior content of cat.jpg
+ .squirrel-index/index-20260604T120000.000Z-run-12.db # global index snapshot (ride-along)
docs/
invoice.pdf
.squirrel-history/run-9/invoice.pdf
@@ -180,6 +200,8 @@ Each destination is a tree shaped like the local volumes:
`.squirrel-history/run-/` is rclone's `--backup-dir` target for that sync run. It is filtered out of all subsequent comparisons so it does not grow rclone's listing time or get uploaded back. A directory literally called `.squirrel-history` in your source volume is also filtered (with a warning), to keep the reserved name out of the destination tree by accident.
+`.squirrel-index/` holds the index snapshots ridden along after each successful sync (see [Index snapshots](#index-snapshots)). Like `.squirrel-history`, it is filtered out of all sync and restore transfers and from peer-sync, so a snapshot is never mistaken for user content.
+
## Notes
- Hash: BLAKE3-256 via `github.com/zeebo/blake3`. Stored as a 32-byte `BLOB` in the `blake3` column. The CLI accepts and prints hex.
diff --git a/cmd/squirrel/agent.go b/cmd/squirrel/agent.go
index 7675787..d4a5ff5 100644
--- a/cmd/squirrel/agent.go
+++ b/cmd/squirrel/agent.go
@@ -180,7 +180,14 @@ func buildSchedulerSyncRunner(cfg *config.Config, s *store.Store, rcl *sync.Rclo
if err != nil {
return agent.SyncRunReport{Err: err}
}
- rep, runErr := sync.RunPair(ctx, s, rcl, pair, sync.Options{})
+ // Snapshot-on-sync fires on each node's scheduled syncs too (#75):
+ // this is the operating cadence the catalog churns on. Each kick is
+ // a single pair, so a fresh Snapshotter per kick is the right unit.
+ opts := sync.Options{}
+ if cfg.Backups.Enabled {
+ opts.Snapshot = sync.NewSnapshotter(s, rcl, snapshotConfig(cfg, s.Path()))
+ }
+ rep, runErr := sync.RunPair(ctx, s, rcl, pair, opts)
return agent.SyncRunReport{RunID: rep.RunID, Status: rep.Status, Err: runErr}
}
}
diff --git a/cmd/squirrel/sync.go b/cmd/squirrel/sync.go
index a60e63d..bb5c4d1 100644
--- a/cmd/squirrel/sync.go
+++ b/cmd/squirrel/sync.go
@@ -74,6 +74,13 @@ func runSync(cmd *cobra.Command, volumeName, destinationName string, opts sync.O
if err := writeRcloneConfigLogged(out, rcl, cfg); err != nil {
return err
}
+ // One snapshotter shared across every pair: the VACUUM INTO snapshot
+ // is taken once per invocation and fanned out (decision #1). Disabled
+ // for dry-run (no run rows to snapshot against) and when [backups] is
+ // turned off.
+ if !opts.DryRun && cfg.Backups.Enabled {
+ opts.Snapshot = sync.NewSnapshotter(s, rcl, snapshotConfig(cfg, s.Path()))
+ }
var anyFailed bool
for _, p := range pairs {
@@ -89,6 +96,23 @@ func runSync(cmd *cobra.Command, volumeName, destinationName string, opts sync.O
return nil
}
+// snapshotConfig resolves the [backups] config into the sync package's
+// SnapshotConfig, filling an empty backups dir with the default sibling
+// backups/ directory next to the live DB (the same tier `db backup` and
+// the pre-migration snapshots use).
+func snapshotConfig(cfg *config.Config, dbPath string) sync.SnapshotConfig {
+ dir := cfg.Backups.Dir
+ if dir == "" {
+ dir = defaultBackupsDir(dbPath)
+ }
+ return sync.SnapshotConfig{
+ Dir: dir,
+ Keep: cfg.Backups.Keep,
+ Cloud: cfg.Backups.Cloud,
+ CloudKeep: cfg.Backups.CloudKeep,
+ }
+}
+
// shallowSyncWarning is printed at the CLI layer when a sync or restore
// runs with --shallow. It spells out the safety trade so the operator
// knows a destination whose size and mtime happen to match the source
@@ -164,6 +188,12 @@ func printSyncReport(w io.Writer, rep sync.Report, runErr error) {
// the runs row is stuck in 'running' until manually reconciled.
fmt.Fprintf(w, " warning: failed to record terminal run state: %v\n", rep.FinishErr)
}
+ if rep.SnapshotErr != nil {
+ // Defense-in-depth only: the sync itself succeeded; the index
+ // snapshot or its ride-along did not. Surface it without colouring
+ // the run's status.
+ fmt.Fprintf(w, " warning: index snapshot: %v\n", rep.SnapshotErr)
+ }
if runErr != nil {
fmt.Fprintf(w, " %v\n", runErr)
}
diff --git a/config/backups.go b/config/backups.go
new file mode 100644
index 0000000..060c77c
--- /dev/null
+++ b/config/backups.go
@@ -0,0 +1,89 @@
+package config
+
+import "fmt"
+
+// Backups is the resolved `[backups]` configuration governing the
+// snapshot-on-sync feature (#75): after a successful sync, squirrel takes
+// a VACUUM INTO snapshot of the index to a local tier and — for
+// destination syncs — rides a copy along to the destination bucket so the
+// catalog inherits the same redundancy as the data it describes.
+//
+// Defense-in-depth is the default: an absent `[backups]` table means
+// "enabled with the defaults below", and individual keys override only
+// what they name. Setting Enabled=false disables both halves; Cloud=false
+// keeps the local snapshot but skips the ride-along upload.
+type Backups struct {
+ // Enabled gates the whole feature — the local snapshot-on-sync and,
+ // transitively, the cloud ride-along.
+ Enabled bool
+ // Dir is the local snapshot directory. Empty means the consumer
+ // resolves it to "/backups" (the same sibling directory
+ // the pre-migration and `db backup` snapshots use); the dependency on
+ // the resolved DB path is why the default is applied at the call site
+ // rather than here.
+ Dir string
+ // Keep bounds the local snapshot directory: after writing, the oldest
+ // snapshots are rotated away until at most Keep remain. Zero means no
+ // rotation.
+ Keep int
+ // Cloud gates the destination ride-along. Ignored when Enabled is
+ // false (no snapshot is taken to upload).
+ Cloud bool
+ // CloudKeep bounds each destination's per-volume .squirrel-index/
+ // directory. Zero means no rotation.
+ CloudKeep int
+}
+
+// DefaultBackups returns the zero-config defaults: both halves on, seven
+// snapshots kept on each tier, local directory resolved by the consumer.
+func DefaultBackups() Backups {
+ return Backups{Enabled: true, Dir: "", Keep: 7, Cloud: true, CloudKeep: 7}
+}
+
+// rawBackups is the on-disk shape of the `[backups]` table. Every field is
+// a pointer (or, for Dir, distinguished by emptiness) so resolveBackups
+// can tell "key omitted" from "key set to the zero value" — without that,
+// `enabled = false` would be indistinguishable from a missing key.
+type rawBackups struct {
+ Enabled *bool `toml:"enabled"`
+ Dir string `toml:"dir"`
+ Keep *int `toml:"keep"`
+ Cloud *bool `toml:"cloud"`
+ CloudKeep *int `toml:"cloud_keep"`
+}
+
+// resolveBackups folds an optional `[backups]` table over the defaults. A
+// nil raw (no table) yields DefaultBackups unchanged. Present keys
+// override; Keep and CloudKeep must be non-negative.
+func resolveBackups(raw *rawBackups) (Backups, error) {
+ b := DefaultBackups()
+ if raw == nil {
+ return b, nil
+ }
+ if raw.Enabled != nil {
+ b.Enabled = *raw.Enabled
+ }
+ if raw.Dir != "" {
+ dir, err := expandPath(raw.Dir)
+ if err != nil {
+ return Backups{}, fmt.Errorf("dir: %w", err)
+ }
+ b.Dir = dir
+ }
+ if raw.Keep != nil {
+ if *raw.Keep < 0 {
+ return Backups{}, fmt.Errorf("keep must be non-negative, got %d", *raw.Keep)
+ }
+ b.Keep = *raw.Keep
+ }
+ if raw.Cloud != nil {
+ b.Cloud = *raw.Cloud
+ }
+ if raw.CloudKeep != nil {
+ if *raw.CloudKeep < 0 {
+ return Backups{}, fmt.Errorf("cloud_keep must be non-negative, got %d", *raw.CloudKeep)
+ }
+ b.CloudKeep = *raw.CloudKeep
+ }
+ return b, nil
+}
diff --git a/config/backups_test.go b/config/backups_test.go
new file mode 100644
index 0000000..67846ec
--- /dev/null
+++ b/config/backups_test.go
@@ -0,0 +1,124 @@
+package config
+
+import (
+ "strings"
+ "testing"
+)
+
+// TestBackupsDefaultWhenAbsent: an absent [backups] table resolves to the
+// zero-config defaults — on by default, both halves enabled.
+func TestBackupsDefaultWhenAbsent(t *testing.T) {
+ p := writeConfig(t, `
+[volumes.pics]
+path = "/tmp/pics"
+`)
+ cfg, err := Load(p)
+ if err != nil {
+ t.Fatalf("Load: %v", err)
+ }
+ want := DefaultBackups()
+ if cfg.Backups != want {
+ t.Fatalf("Backups = %+v, want defaults %+v", cfg.Backups, want)
+ }
+}
+
+// TestBackupsOverrides: present keys override the defaults; omitted keys
+// keep their default. dir is expanded to an absolute path.
+func TestBackupsOverrides(t *testing.T) {
+ p := writeConfig(t, `
+[backups]
+keep = 3
+cloud_keep = 10
+dir = "/var/backups/squirrel"
+
+[volumes.pics]
+path = "/tmp/pics"
+`)
+ cfg, err := Load(p)
+ if err != nil {
+ t.Fatalf("Load: %v", err)
+ }
+ b := cfg.Backups
+ if !b.Enabled || !b.Cloud {
+ t.Fatalf("Enabled=%v Cloud=%v, want both true (omitted → default)", b.Enabled, b.Cloud)
+ }
+ if b.Keep != 3 || b.CloudKeep != 10 {
+ t.Fatalf("Keep=%d CloudKeep=%d, want 3/10", b.Keep, b.CloudKeep)
+ }
+ if b.Dir != "/var/backups/squirrel" {
+ t.Fatalf("Dir = %q, want /var/backups/squirrel", b.Dir)
+ }
+}
+
+// TestBackupsDisabled: enabled=false is distinguishable from "omitted"
+// thanks to the pointer field, and turns the whole feature off.
+func TestBackupsDisabled(t *testing.T) {
+ p := writeConfig(t, `
+[backups]
+enabled = false
+
+[volumes.pics]
+path = "/tmp/pics"
+`)
+ cfg, err := Load(p)
+ if err != nil {
+ t.Fatalf("Load: %v", err)
+ }
+ if cfg.Backups.Enabled {
+ t.Fatalf("Enabled = true, want false")
+ }
+ // Cloud keeps its default; Enabled is the master switch the consumer
+ // checks first.
+ if !cfg.Backups.Cloud {
+ t.Fatalf("Cloud = false, want default true (only enabled was set)")
+ }
+}
+
+// TestBackupsCloudDisabled: cloud=false keeps the local snapshot on but
+// turns off the ride-along.
+func TestBackupsCloudDisabled(t *testing.T) {
+ p := writeConfig(t, `
+[backups]
+cloud = false
+
+[volumes.pics]
+path = "/tmp/pics"
+`)
+ cfg, err := Load(p)
+ if err != nil {
+ t.Fatalf("Load: %v", err)
+ }
+ if !cfg.Backups.Enabled {
+ t.Fatalf("Enabled = false, want true")
+ }
+ if cfg.Backups.Cloud {
+ t.Fatalf("Cloud = true, want false")
+ }
+}
+
+func TestBackupsRejectsNegativeKeep(t *testing.T) {
+ p := writeConfig(t, `
+[backups]
+keep = -1
+
+[volumes.pics]
+path = "/tmp/pics"
+`)
+ _, err := Load(p)
+ if err == nil || !strings.Contains(err.Error(), "keep must be non-negative") {
+ t.Fatalf("expected negative-keep error, got %v", err)
+ }
+}
+
+func TestBackupsRejectsUnknownField(t *testing.T) {
+ p := writeConfig(t, `
+[backups]
+nope = true
+
+[volumes.pics]
+path = "/tmp/pics"
+`)
+ if _, err := Load(p); err == nil {
+ t.Fatalf("expected unknown-field error for [backups].nope")
+ }
+}
diff --git a/config/config.go b/config/config.go
index c5eea6c..568a4bb 100644
--- a/config/config.go
+++ b/config/config.go
@@ -58,6 +58,10 @@ type Config struct {
// Agent is non-nil when the config declares an `[agent]` block. The
// agent subcommand requires it; other subcommands ignore it.
Agent *Agent
+ // Backups is the resolved `[backups]` configuration. Always populated:
+ // an absent table resolves to DefaultBackups (snapshot-on-sync on with
+ // sensible defaults).
+ Backups Backups
}
// Volume is one indexable root.
@@ -190,6 +194,7 @@ type rawConfig struct {
Destinations map[string]map[string]any `toml:"destinations"`
Nodes map[string]rawNode `toml:"nodes"`
Agent *rawAgent `toml:"agent"`
+ Backups *rawBackups `toml:"backups"`
}
type rawVolume struct {
@@ -257,6 +262,11 @@ func (r *rawConfig) resolve(path string) (*Config, error) {
}
cfg.Agent = a
}
+ backups, err := resolveBackups(r.Backups)
+ if err != nil {
+ return nil, fmt.Errorf("backups: %w", err)
+ }
+ cfg.Backups = backups
return cfg, nil
}
diff --git a/sync/node.go b/sync/node.go
index aa34442..75b3527 100644
--- a/sync/node.go
+++ b/sync/node.go
@@ -44,6 +44,21 @@ func SyncNode(ctx context.Context, s *store.Store, rcl *Rclone, vol *config.Volu
if err != nil {
return rep, err
}
+ err = runNodeSession(ctx, s, rcl, vol, volID, node, opts, &rep)
+ // runNodeSession's deferred finishRun has committed the run's
+ // terminal state by now, so the snapshot reflects this run's own row.
+ // Peer-sync takes the local snapshot only — there is no ride-along to
+ // peer nodes (dest=nil), and the Snapshotter no-ops on non-terminal
+ // states and dry-run.
+ opts.Snapshot.afterSync(ctx, &rep, vol, nil)
+ return rep, err
+}
+
+// runNodeSession runs the five-phase driver and owns the deferred
+// terminal-state write. It is split out of SyncNode so the deferred
+// finishRun commits before the snapshot-on-sync hook runs — the snapshot
+// must reflect this run's own committed row.
+func runNodeSession(ctx context.Context, s *store.Store, rcl *Rclone, vol *config.Volume, volID int64, node *config.Node, opts Options, rep *Report) (err error) {
rep.Status = store.RunStatusFailed
defer func() {
// Re-derive on the way out: when we never made it past Begin we
@@ -74,9 +89,9 @@ func SyncNode(ctx context.Context, s *store.Store, rcl *Rclone, vol *config.Volu
node: node,
client: client,
opts: opts,
- report: &rep,
+ report: rep,
}
- return rep, driver.run()
+ return driver.run()
}
// nodeSyncDriver drives the five-phase initiator-side flow. The
@@ -427,7 +442,8 @@ func (d *nodeSyncDriver) collectIndexEntries() ([]syncproto.IndexEntry, error) {
func isReservedSyncPath(p string) bool {
return strings.HasPrefix(p, HistoryDirName+"/") ||
strings.HasPrefix(p, ConflictsDirName+"/") ||
- strings.HasPrefix(p, RestoreHistoryDirName+"/")
+ strings.HasPrefix(p, RestoreHistoryDirName+"/") ||
+ strings.HasPrefix(p, IndexDirName+"/")
}
// isReservedFolderPath is the folder-path variant of
@@ -439,7 +455,7 @@ func isReservedSyncPath(p string) bool {
// rejects, aborting the whole walk.
func isReservedFolderPath(p string) bool {
return p == HistoryDirName || p == ConflictsDirName || p == RestoreHistoryDirName ||
- isReservedSyncPath(p)
+ p == IndexDirName || isReservedSyncPath(p)
}
// phaseTransfer invokes rclone exactly once over the transfer +
diff --git a/sync/rclone.go b/sync/rclone.go
index 4644ae2..7f305a6 100644
--- a/sync/rclone.go
+++ b/sync/rclone.go
@@ -314,6 +314,71 @@ func (r *Rclone) RunWithProgress(ctx context.Context, onProgress func(runevents.
return result, nil
}
+// runPlain executes rclone with the wrapper's --config but without the
+// JSON-log and stats flags Run uses, returning captured stdout. It backs
+// the auxiliary commands the snapshot ride-along needs (copyto, lsf,
+// deletefile), where we want a simple exit-code success/failure and, for
+// lsf, a short listing — not the streamed copy stats parseJSONLog builds.
+// Stderr is folded into the error on failure for diagnostics.
+func (r *Rclone) runPlain(ctx context.Context, args ...string) ([]byte, error) {
+ if r.Config == "" {
+ return nil, errors.New("rclone wrapper: Config not set (call WriteRcloneConfig first)")
+ }
+ full := append([]string{"--config", r.Config}, args...)
+ cmd := exec.CommandContext(ctx, r.Binary, full...)
+ var stdout, stderr bytes.Buffer
+ cmd.Stdout = &stdout
+ cmd.Stderr = &stderr
+ if err := cmd.Run(); err != nil {
+ if msg := strings.TrimSpace(stderr.String()); msg != "" {
+ return stdout.Bytes(), fmt.Errorf("rclone %s: %w: %s", args[0], err, msg)
+ }
+ return stdout.Bytes(), fmt.Errorf("rclone %s: %w", args[0], err)
+ }
+ return stdout.Bytes(), nil
+}
+
+// copyTo copies a single source file to a single destination path via
+// `rclone copyto`, creating intermediate directories as needed. Used by
+// the snapshot ride-along to land one .db file at a fixed destination
+// name (copy, by contrast, would treat the destination as a directory).
+func (r *Rclone) copyTo(ctx context.Context, src, dst string) error {
+ _, err := r.runPlain(ctx, "copyto", src, dst)
+ return err
+}
+
+// listSnapshots returns the snapshot filenames directly under dirURI via
+// `rclone lsf`. A missing directory yields an empty list, not an error:
+// the first ride-along to a volume legitimately finds nothing there yet,
+// and rclone reports the absent directory on stderr with a non-zero exit.
+// Only index-* .db entries are returned so an unrelated file in the tree
+// is never a rotation candidate.
+func (r *Rclone) listSnapshots(ctx context.Context, dirURI string) ([]string, error) {
+ out, err := r.runPlain(ctx, "lsf", "--files-only", dirURI)
+ if err != nil {
+ // lsf against a not-yet-created directory exits non-zero; treat an
+ // empty listing as "nothing to rotate" rather than a hard error.
+ if len(bytes.TrimSpace(out)) == 0 {
+ return nil, nil
+ }
+ return nil, err
+ }
+ var names []string
+ for _, line := range strings.Split(string(out), "\n") {
+ name := strings.TrimSpace(line)
+ if strings.HasPrefix(name, snapshotPrefix) && strings.HasSuffix(name, ".db") {
+ names = append(names, name)
+ }
+ }
+ return names, nil
+}
+
+// deleteFile removes a single file at fileURI via `rclone deletefile`.
+func (r *Rclone) deleteFile(ctx context.Context, fileURI string) error {
+ _, err := r.runPlain(ctx, "deletefile", fileURI)
+ return err
+}
+
// rcloneEvent captures the subset of rclone's JSON log we care about: the
// level (for error filtering), the per-object message and object name (for
// failed-file lists), and the stats object that rclone emits at the end of
diff --git a/sync/snapshot.go b/sync/snapshot.go
new file mode 100644
index 0000000..d18bdd8
--- /dev/null
+++ b/sync/snapshot.go
@@ -0,0 +1,254 @@
+package sync
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "path"
+ "path/filepath"
+ "sort"
+ "strings"
+ "sync"
+ "time"
+
+ "github.com/mbertschler/squirrel/config"
+ "github.com/mbertschler/squirrel/store"
+)
+
+// snapshotTimeLayout is the ISO8601-ish, lexically sortable timestamp
+// embedded in snapshot filenames. Millisecond precision so two snapshots
+// taken in the same second (back-to-back CLI invocations, tests) get
+// distinct names without a retry loop. Matches the layout the store and
+// `db backup` already use so one backups/ directory stays consistent.
+const snapshotTimeLayout = "20060102T150405.000Z"
+
+// snapshotPrefix is the filename stem for snapshot-on-sync files. The
+// run id follows so a snapshot is traceable to the exact runs row that
+// produced it, and the leading timestamp keeps the directory lexically
+// (and chronologically) sortable for rotation.
+const snapshotPrefix = "index-"
+
+// Snapshotter coordinates the snapshot-on-sync feature (#75) across the
+// pairs of one `squirrel sync` invocation. It takes at most one VACUUM
+// INTO snapshot — lazily, on the first pair that reaches a terminal
+// success/partial state — and reuses that single file for the local tier
+// and every destination ride-along. Construct one per CLI invocation with
+// NewSnapshotter and pass it via Options.Snapshot; a nil *Snapshotter is
+// the disabled state and every method is a safe no-op on it.
+type Snapshotter struct {
+ store *store.Store
+ rcl *Rclone
+ dir string // resolved local snapshot directory
+ keep int // local rotation bound (0 = no rotation)
+ cloud bool // ride snapshots along to destination buckets
+ cloudKeep int // per-volume .squirrel-index/ rotation bound
+
+ mu sync.Mutex
+ taken bool // the single VACUUM has been attempted
+ localPath string // the snapshot file, "" if the VACUUM failed
+ takeErr error // memoised local-snapshot/rotation error
+}
+
+// SnapshotConfig is the resolved input to NewSnapshotter. The CLI builds
+// it from config.Backups, resolving Dir against the live DB path (an
+// empty config.Backups.Dir means "/backups").
+type SnapshotConfig struct {
+ Dir string
+ Keep int
+ Cloud bool
+ CloudKeep int
+}
+
+// NewSnapshotter returns a Snapshotter ready to be shared across one
+// invocation's pairs. The store backs the VACUUM INTO snapshot; the
+// rclone wrapper (with its Config already written) backs the ride-along.
+func NewSnapshotter(s *store.Store, rcl *Rclone, cfg SnapshotConfig) *Snapshotter {
+ return &Snapshotter{
+ store: s,
+ rcl: rcl,
+ dir: cfg.Dir,
+ keep: cfg.Keep,
+ cloud: cfg.Cloud,
+ cloudKeep: cfg.CloudKeep,
+ }
+}
+
+// afterSync is the post-run hook called by Sync and SyncNode once the
+// run's terminal state is committed. It takes (once) the local snapshot
+// and, for destination syncs with cloud enabled, rides a copy along to
+// the destination. Failures are surfaced on rep.SnapshotErr and never
+// mutate rep.Status — the snapshot is defense-in-depth, not part of the
+// sync's success contract. A nil receiver (feature disabled) is a no-op.
+func (sn *Snapshotter) afterSync(ctx context.Context, rep *Report, vol *config.Volume, dest *config.Destination) {
+ if sn == nil {
+ return
+ }
+ // Only snapshot when the run actually reached a terminal good state
+ // and wrote a row. Dry-run never populates RunID (and the CLI leaves
+ // Snapshot nil for it anyway), so this also guards that path.
+ if rep.RunID == 0 {
+ return
+ }
+ if rep.Status != store.RunStatusSuccess && rep.Status != store.RunStatusPartial {
+ return
+ }
+
+ localPath, err := sn.ensureLocalSnapshot(ctx, rep.RunID)
+ if err != nil {
+ rep.SnapshotErr = err
+ }
+ if localPath == "" {
+ // The VACUUM itself failed; there is nothing to ride along.
+ return
+ }
+ if dest == nil || !sn.cloud {
+ return
+ }
+ if rideErr := sn.rideAlong(ctx, localPath, dest, vol.Name); rideErr != nil {
+ rep.SnapshotErr = rideErr
+ }
+}
+
+// ensureLocalSnapshot takes the single VACUUM INTO snapshot the first
+// time it is called and memoises the result; later pairs reuse the same
+// file (decision #1: one snapshot per invocation, fanned out — never one
+// VACUUM per pair). The returned path is "" only when the VACUUM failed;
+// a non-fatal rotation error is returned alongside a valid path so the
+// ride-along still proceeds.
+func (sn *Snapshotter) ensureLocalSnapshot(ctx context.Context, runID int64) (string, error) {
+ sn.mu.Lock()
+ defer sn.mu.Unlock()
+ if sn.taken {
+ return sn.localPath, sn.takeErr
+ }
+ sn.taken = true
+
+ name := fmt.Sprintf("%s%s-run-%d.db", snapshotPrefix, time.Now().UTC().Format(snapshotTimeLayout), runID)
+ dst := filepath.Join(sn.dir, name)
+ if err := sn.store.Backup(ctx, dst); err != nil {
+ sn.takeErr = fmt.Errorf("snapshot index to %s: %w", dst, err)
+ return "", sn.takeErr
+ }
+ sn.localPath = dst
+ if _, err := rotateSnapshots(sn.dir, sn.keep); err != nil {
+ // The snapshot we just wrote is valid; a rotation hiccup shouldn't
+ // block the ride-along. Record it but keep the path.
+ sn.takeErr = fmt.Errorf("rotate local snapshots in %s: %w", sn.dir, err)
+ }
+ return sn.localPath, sn.takeErr
+}
+
+// rideAlong uploads localPath to //.squirrel-index/ via the
+// rclone wrapper, then rotates that per-volume directory to at most
+// cloudKeep snapshots. The uploaded copy keeps the snapshot's filename so
+// the catalog is traceable to its producing run on the destination too.
+func (sn *Snapshotter) rideAlong(ctx context.Context, localPath string, dest *config.Destination, volumeName string) error {
+ dirURI := indexDirURI(dest, volumeName)
+ name := filepath.Base(localPath)
+ if err := sn.rcl.copyTo(ctx, localPath, dirURI+"/"+name); err != nil {
+ return fmt.Errorf("ride-along upload to %s: %w", dest.Name, err)
+ }
+ if err := sn.rotateCloud(ctx, dirURI); err != nil {
+ return fmt.Errorf("rotate %s/%s/%s: %w", dest.Name, volumeName, IndexDirName, err)
+ }
+ return nil
+}
+
+// rotateCloud lists the destination's .squirrel-index/ directory and
+// deletes the oldest snapshots until at most cloudKeep remain. Snapshots
+// are lexically sortable (decision #3), so "newest N" is the tail of the
+// name-sorted list — no per-file metadata read required. cloudKeep<=0
+// means "no rotation".
+func (sn *Snapshotter) rotateCloud(ctx context.Context, dirURI string) error {
+ if sn.cloudKeep <= 0 {
+ return nil
+ }
+ names, err := sn.rcl.listSnapshots(ctx, dirURI)
+ if err != nil {
+ return err
+ }
+ if len(names) <= sn.cloudKeep {
+ return nil
+ }
+ sort.Strings(names)
+ for _, old := range names[:len(names)-sn.cloudKeep] {
+ if err := sn.rcl.deleteFile(ctx, dirURI+"/"+old); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// indexDirURI returns the rclone URI of the per-volume .squirrel-index/
+// directory under dest, mirroring backupDirURI: an absolute filesystem
+// path for type=local, "://.squirrel-index" otherwise.
+func indexDirURI(dest *config.Destination, volumeName string) string {
+ subpath := path.Join(volumeName, IndexDirName)
+ switch dest.Type {
+ case "local":
+ return filepath.ToSlash(filepath.Join(dest.Root, subpath))
+ default:
+ return dest.Name + ":" + path.Join(dest.Root, subpath)
+ }
+}
+
+// rotateSnapshots deletes the oldest snapshots in dir until only keep
+// remain, mirroring the CLI's `db backup --keep` rotation. Files are
+// matched by the index-/pre-migration- prefixes squirrel writes — the
+// snapshot-on-sync directory defaults to the same backups/ dir the store
+// and CLI use, so both prefixes share the bound. Unknown files are left
+// untouched. keep<=0 means "no rotation".
+func rotateSnapshots(dir string, keep int) ([]string, error) {
+ if keep <= 0 {
+ return nil, nil
+ }
+ entries, err := os.ReadDir(dir)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return nil, nil
+ }
+ return nil, err
+ }
+ type snap struct {
+ name string
+ modTime time.Time
+ }
+ var snaps []snap
+ for _, e := range entries {
+ if e.IsDir() {
+ continue
+ }
+ name := e.Name()
+ if !strings.HasPrefix(name, snapshotPrefix) && !strings.HasPrefix(name, "pre-migration-") {
+ continue
+ }
+ info, err := e.Info()
+ if err != nil {
+ continue
+ }
+ snaps = append(snaps, snap{name: name, modTime: info.ModTime()})
+ }
+ if len(snaps) <= keep {
+ return nil, nil
+ }
+ // Order oldest-first. Break modtime ties by name: filenames embed a
+ // sortable timestamp, so on filesystems with coarse mtime resolution
+ // (or snapshots written within one tick) the name keeps the order
+ // deterministic and chronological — without it, equal modtimes could
+ // rotate away a newer snapshot.
+ sort.Slice(snaps, func(i, j int) bool {
+ if snaps[i].modTime.Equal(snaps[j].modTime) {
+ return snaps[i].name < snaps[j].name
+ }
+ return snaps[i].modTime.Before(snaps[j].modTime)
+ })
+ var removed []string
+ for _, s := range snaps[:len(snaps)-keep] {
+ p := filepath.Join(dir, s.name)
+ if err := os.Remove(p); err != nil {
+ return removed, err
+ }
+ removed = append(removed, p)
+ }
+ return removed, nil
+}
diff --git a/sync/snapshot_test.go b/sync/snapshot_test.go
new file mode 100644
index 0000000..75d9efd
--- /dev/null
+++ b/sync/snapshot_test.go
@@ -0,0 +1,357 @@
+package sync
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+
+ "github.com/mbertschler/squirrel/config"
+ "github.com/mbertschler/squirrel/index"
+ "github.com/mbertschler/squirrel/store"
+)
+
+// snapshotterFor builds a Snapshotter wired to the fixture's store and
+// rclone wrapper, with the local tier pointed at a fresh temp dir.
+func (f *syncFixture) snapshotterFor(t *testing.T, cfg SnapshotConfig) *Snapshotter {
+ t.Helper()
+ if cfg.Dir == "" {
+ cfg.Dir = t.TempDir()
+ }
+ return NewSnapshotter(f.store, f.rcl, cfg)
+}
+
+func globOne(t *testing.T, pattern string) string {
+ t.Helper()
+ matches, err := filepath.Glob(pattern)
+ if err != nil {
+ t.Fatalf("glob %s: %v", pattern, err)
+ }
+ if len(matches) != 1 {
+ t.Fatalf("glob %s = %d matches, want exactly 1: %v", pattern, len(matches), matches)
+ }
+ return matches[0]
+}
+
+// TestSyncCloudRideAlong is the headline acceptance: after a successful
+// destination sync a snapshot lands under
+// //.squirrel-index/, is the *global* index.db (carries
+// rows for a volume that was never synced to this destination, per
+// decision #1), opens cleanly, and a local-tier copy exists alongside.
+// The filename carries the producing run id.
+func TestSyncCloudRideAlong(t *testing.T) {
+ f := setupFixture(t)
+ if err := os.WriteFile(filepath.Join(f.vol.Path, "a.txt"), []byte("alpha"), 0o644); err != nil {
+ t.Fatal(err)
+ }
+ f.runIndex(t)
+
+ // A second volume indexed into the *same* store but never synced here.
+ // Its rows must still appear in the ride-along snapshot, proving the
+ // payload is the whole catalog rather than a per-volume slice.
+ otherPath := t.TempDir()
+ if err := os.WriteFile(filepath.Join(otherPath, "other.txt"), []byte("o"), 0o644); err != nil {
+ t.Fatal(err)
+ }
+ if _, err := index.Index(context.Background(), f.store, otherPath, index.Options{Name: "other"}); err != nil {
+ t.Fatalf("index other volume: %v", err)
+ }
+
+ localDir := t.TempDir()
+ sn := f.snapshotterFor(t, SnapshotConfig{Dir: localDir, Keep: 7, Cloud: true, CloudKeep: 7})
+ rep, err := Sync(context.Background(), f.store, f.rcl, f.vol, f.dest, Options{Snapshot: sn})
+ if err != nil {
+ t.Fatalf("Sync: %v (rep=%+v)", err, rep)
+ }
+ if rep.Status != store.RunStatusSuccess {
+ t.Fatalf("Status = %q, want success", rep.Status)
+ }
+ if rep.SnapshotErr != nil {
+ t.Fatalf("SnapshotErr = %v, want nil", rep.SnapshotErr)
+ }
+
+ // Cloud snapshot present under the per-volume .squirrel-index dir.
+ cloudSnap := globOne(t, filepath.Join(f.dest.Root, f.vol.Name, IndexDirName, "index-*-run-*.db"))
+ if want := fmt.Sprintf("run-%d.db", rep.RunID); !strings.HasSuffix(cloudSnap, want) {
+ t.Fatalf("cloud snapshot %s does not end with %q", cloudSnap, want)
+ }
+ // Local tier copy present with the same name.
+ localSnap := globOne(t, filepath.Join(localDir, "index-*-run-*.db"))
+ if filepath.Base(localSnap) != filepath.Base(cloudSnap) {
+ t.Fatalf("local %s and cloud %s have different names", filepath.Base(localSnap), filepath.Base(cloudSnap))
+ }
+
+ // Opens cleanly and is the global catalog: the never-synced "other"
+ // volume's rows are present in the destination-side snapshot.
+ snap, err := store.Open(cloudSnap)
+ if err != nil {
+ t.Fatalf("open cloud snapshot: %v", err)
+ }
+ defer snap.Close()
+ rows, err := snap.IntegrityCheck(context.Background())
+ if err != nil {
+ t.Fatalf("snapshot integrity check: %v", err)
+ }
+ if !store.IsIntegrityClean(rows) {
+ t.Fatalf("snapshot integrity = %v, want ok", rows)
+ }
+ other, err := snap.GetVolumeByName(context.Background(), "other")
+ if err != nil {
+ t.Fatalf("snapshot missing 'other' volume (not a global catalog?): %v", err)
+ }
+ paths, err := snap.ListPresentPathsUnder(context.Background(), other.ID)
+ if err != nil {
+ t.Fatalf("list other paths: %v", err)
+ }
+ if _, ok := paths["other.txt"]; !ok {
+ t.Fatalf("snapshot 'other' volume missing other.txt; paths=%v", paths)
+ }
+}
+
+// TestSyncCloudDisabledKeepsLocalSnapshot: with Cloud=false a local
+// snapshot is still taken, but nothing is uploaded to the destination.
+func TestSyncCloudDisabledKeepsLocalSnapshot(t *testing.T) {
+ f := setupFixture(t)
+ if err := os.WriteFile(filepath.Join(f.vol.Path, "a.txt"), []byte("alpha"), 0o644); err != nil {
+ t.Fatal(err)
+ }
+ f.runIndex(t)
+
+ localDir := t.TempDir()
+ sn := f.snapshotterFor(t, SnapshotConfig{Dir: localDir, Keep: 7, Cloud: false, CloudKeep: 7})
+ rep, err := Sync(context.Background(), f.store, f.rcl, f.vol, f.dest, Options{Snapshot: sn})
+ if err != nil {
+ t.Fatalf("Sync: %v", err)
+ }
+ if rep.SnapshotErr != nil {
+ t.Fatalf("SnapshotErr = %v, want nil", rep.SnapshotErr)
+ }
+ if _, err := os.Stat(filepath.Join(localDir)); err != nil {
+ t.Fatalf("local backups dir missing: %v", err)
+ }
+ globOne(t, filepath.Join(localDir, "index-*-run-*.db"))
+ if _, err := os.Stat(filepath.Join(f.dest.Root, f.vol.Name, IndexDirName)); !os.IsNotExist(err) {
+ t.Fatalf("cloud .squirrel-index exists with cloud=false (err=%v)", err)
+ }
+}
+
+// TestSyncNoSnapshotterIsNoOp: a nil Options.Snapshot (feature disabled,
+// e.g. [backups] enabled=false) takes no snapshot at all.
+func TestSyncNoSnapshotterIsNoOp(t *testing.T) {
+ f := setupFixture(t)
+ if err := os.WriteFile(filepath.Join(f.vol.Path, "a.txt"), []byte("alpha"), 0o644); err != nil {
+ t.Fatal(err)
+ }
+ f.runIndex(t)
+
+ rep, err := Sync(context.Background(), f.store, f.rcl, f.vol, f.dest, Options{})
+ if err != nil {
+ t.Fatalf("Sync: %v", err)
+ }
+ if rep.SnapshotErr != nil {
+ t.Fatalf("SnapshotErr = %v, want nil", rep.SnapshotErr)
+ }
+ if _, err := os.Stat(filepath.Join(f.dest.Root, f.vol.Name, IndexDirName)); !os.IsNotExist(err) {
+ t.Fatalf("snapshot taken with no Snapshotter (err=%v)", err)
+ }
+}
+
+// TestSnapshotErrorDoesNotFailSync injects a backup failure (the local
+// snapshot dir's parent is a regular file, so MkdirAll fails) and asserts
+// the sync run's status is unchanged while the failure surfaces on
+// SnapshotErr. Defense-in-depth must never flip a good sync to failed.
+func TestSnapshotErrorDoesNotFailSync(t *testing.T) {
+ f := setupFixture(t)
+ if err := os.WriteFile(filepath.Join(f.vol.Path, "a.txt"), []byte("alpha"), 0o644); err != nil {
+ t.Fatal(err)
+ }
+ f.runIndex(t)
+
+ // Make the snapshot dir un-creatable: its parent is a file.
+ blocker := filepath.Join(t.TempDir(), "not-a-dir")
+ if err := os.WriteFile(blocker, []byte("x"), 0o644); err != nil {
+ t.Fatal(err)
+ }
+ sn := NewSnapshotter(f.store, f.rcl, SnapshotConfig{Dir: filepath.Join(blocker, "backups"), Keep: 7, Cloud: true, CloudKeep: 7})
+
+ rep, err := Sync(context.Background(), f.store, f.rcl, f.vol, f.dest, Options{Snapshot: sn})
+ if err != nil {
+ t.Fatalf("Sync returned error for a snapshot-only failure: %v", err)
+ }
+ if rep.Status != store.RunStatusSuccess {
+ t.Fatalf("Status = %q, want success (snapshot failure must not flip status)", rep.Status)
+ }
+ if rep.SnapshotErr == nil {
+ t.Fatalf("SnapshotErr = nil, want the injected backup failure")
+ }
+ // The runs row itself stays success.
+ run, err := f.store.GetRun(context.Background(), rep.RunID)
+ if err != nil {
+ t.Fatalf("GetRun: %v", err)
+ }
+ if run.Status != store.RunStatusSuccess {
+ t.Fatalf("run row status = %q, want success", run.Status)
+ }
+}
+
+// TestCloudRotationBoundsDir runs several syncs and asserts the
+// destination .squirrel-index/ dir is bounded to cloud_keep snapshots.
+// CloudKeep=2 with three syncs must leave exactly two (the newest).
+func TestCloudRotationBoundsDir(t *testing.T) {
+ f := setupFixture(t)
+ if err := os.WriteFile(filepath.Join(f.vol.Path, "a.txt"), []byte("alpha"), 0o644); err != nil {
+ t.Fatal(err)
+ }
+ f.runIndex(t)
+
+ indexDir := filepath.Join(f.dest.Root, f.vol.Name, IndexDirName)
+ var names []string
+ for i := 0; i < 3; i++ {
+ // A fresh Snapshotter per invocation: that is the real shape (one
+ // per `squirrel sync`), and each takes its own single snapshot.
+ sn := f.snapshotterFor(t, SnapshotConfig{Dir: t.TempDir(), Keep: 7, Cloud: true, CloudKeep: 2})
+ rep, err := Sync(context.Background(), f.store, f.rcl, f.vol, f.dest, Options{Snapshot: sn})
+ if err != nil {
+ t.Fatalf("Sync %d: %v", i, err)
+ }
+ if rep.SnapshotErr != nil {
+ t.Fatalf("Sync %d SnapshotErr: %v", i, rep.SnapshotErr)
+ }
+ names = append(names, filepath.Base(globOneNewest(t, indexDir)))
+ }
+ left, err := filepath.Glob(filepath.Join(indexDir, "index-*-run-*.db"))
+ if err != nil {
+ t.Fatal(err)
+ }
+ if len(left) != 2 {
+ t.Fatalf("cloud dir has %d snapshots, want 2 (cloud_keep): %v", len(left), left)
+ }
+ // The two survivors are the two newest by name (lexically sortable).
+ if !containsBase(left, names[1]) || !containsBase(left, names[2]) {
+ t.Fatalf("survivors %v are not the two newest %v", left, names[1:])
+ }
+}
+
+// globOneNewest returns the lexically-greatest index snapshot in dir.
+func globOneNewest(t *testing.T, dir string) string {
+ t.Helper()
+ matches, err := filepath.Glob(filepath.Join(dir, "index-*-run-*.db"))
+ if err != nil || len(matches) == 0 {
+ t.Fatalf("glob newest in %s: %d matches, err=%v", dir, len(matches), err)
+ }
+ newest := matches[0]
+ for _, m := range matches[1:] {
+ if m > newest {
+ newest = m
+ }
+ }
+ return newest
+}
+
+func containsBase(paths []string, base string) bool {
+ for _, p := range paths {
+ if filepath.Base(p) == base {
+ return true
+ }
+ }
+ return false
+}
+
+// TestLocalRotationBoundsDir exercises rotateSnapshots directly: writing
+// more than keep index-* files and rotating leaves the newest keep.
+func TestLocalRotationBoundsDir(t *testing.T) {
+ dir := t.TempDir()
+ // Distinct, lexically-ordered names; modtime order matches creation.
+ for i := 0; i < 5; i++ {
+ name := fmt.Sprintf("index-2026010%dT000000.000Z-run-%d.db", i, i)
+ if err := os.WriteFile(filepath.Join(dir, name), []byte("x"), 0o644); err != nil {
+ t.Fatal(err)
+ }
+ }
+ // An unrelated file must be left untouched.
+ if err := os.WriteFile(filepath.Join(dir, "keep-me.txt"), []byte("x"), 0o644); err != nil {
+ t.Fatal(err)
+ }
+ removed, err := rotateSnapshots(dir, 2)
+ if err != nil {
+ t.Fatalf("rotateSnapshots: %v", err)
+ }
+ if len(removed) != 3 {
+ t.Fatalf("removed %d, want 3: %v", len(removed), removed)
+ }
+ left, _ := filepath.Glob(filepath.Join(dir, "index-*.db"))
+ if len(left) != 2 {
+ t.Fatalf("index files left = %d, want 2: %v", len(left), left)
+ }
+ if _, err := os.Stat(filepath.Join(dir, "keep-me.txt")); err != nil {
+ t.Fatalf("rotation removed an unrelated file: %v", err)
+ }
+}
+
+// TestSyncFiltersOutIndexDirFromSource is the reserved-path guard for
+// sync: a .squirrel-index dir that incidentally exists in the source
+// volume must not be uploaded.
+func TestSyncFiltersOutIndexDirFromSource(t *testing.T) {
+ f := setupFixture(t)
+ if err := os.MkdirAll(filepath.Join(f.vol.Path, IndexDirName), 0o755); err != nil {
+ t.Fatal(err)
+ }
+ if err := os.WriteFile(filepath.Join(f.vol.Path, IndexDirName, "stale.db"), []byte("nope"), 0o644); err != nil {
+ t.Fatal(err)
+ }
+ if err := os.WriteFile(filepath.Join(f.vol.Path, "a.txt"), []byte("alpha"), 0o644); err != nil {
+ t.Fatal(err)
+ }
+ f.runIndex(t)
+ // No snapshotter here: we are asserting the *data sync* filter, not the
+ // ride-along, so any .squirrel-index at the destination would be from
+ // the source tree leaking through.
+ if _, err := Sync(context.Background(), f.store, f.rcl, f.vol, f.dest, Options{}); err != nil {
+ t.Fatalf("Sync: %v", err)
+ }
+ if _, err := os.Stat(filepath.Join(f.dest.Root, f.vol.Name, IndexDirName, "stale.db")); err == nil {
+ t.Fatalf("source .squirrel-index leaked to destination; should be filtered")
+ }
+}
+
+// TestReservedSyncPathCoversIndexDir documents that the peer-sync
+// reserved-path predicates exclude .squirrel-index, so a node whose index
+// carries rows there never re-publishes them.
+func TestReservedSyncPathCoversIndexDir(t *testing.T) {
+ if !isReservedSyncPath(IndexDirName + "/index-x-run-1.db") {
+ t.Fatalf("isReservedSyncPath does not cover %s/", IndexDirName)
+ }
+ if !isReservedFolderPath(IndexDirName) {
+ t.Fatalf("isReservedFolderPath does not cover bare %s", IndexDirName)
+ }
+}
+
+// TestBuildArgsFilterIndexDir asserts both the sync and restore arg
+// builders carry the .squirrel-index filter.
+func TestBuildArgsFilterIndexDir(t *testing.T) {
+ vol := &config.Volume{Name: "pics", Path: "/tmp/pics"}
+ dest := &config.Destination{Name: "scratch", Type: "local", Root: "/tmp/dst"}
+
+ syncArgs, err := buildRcloneArgs(vol, dest, 1, Options{})
+ if err != nil {
+ t.Fatalf("buildRcloneArgs: %v", err)
+ }
+ if !argsHaveFilter(syncArgs, "- /"+IndexDirName+"/**") {
+ t.Fatalf("sync args missing .squirrel-index filter: %v", syncArgs)
+ }
+ restoreArgs := buildRestoreArgs(vol, dest, 1, RestoreOptions{})
+ if !argsHaveFilter(restoreArgs, "- /"+IndexDirName+"/**") {
+ t.Fatalf("restore args missing .squirrel-index filter: %v", restoreArgs)
+ }
+}
+
+func argsHaveFilter(args []string, want string) bool {
+ for i, a := range args {
+ if a == "--filter" && i+1 < len(args) && args[i+1] == want {
+ return true
+ }
+ }
+ return false
+}
diff --git a/sync/sync.go b/sync/sync.go
index 7fc9504..47d9ace 100644
--- a/sync/sync.go
+++ b/sync/sync.go
@@ -42,6 +42,14 @@ const ConflictsDirName = ".squirrel-conflicts"
// destroyed atomically by rclone copy.
const RestoreHistoryDirName = ".squirrel-restore-history"
+// IndexDirName is the per-volume directory at the destination where the
+// cloud ride-along (#75) lands index snapshots: one VACUUM INTO copy of
+// the global index.db per successful sync, under
+// //.squirrel-index/. Like the other reserved
+// directories it is filtered out of the data transfer (sync and restore)
+// and from peer-sync so a snapshot is never mistaken for user content.
+const IndexDirName = ".squirrel-index"
+
// Options shapes one Sync invocation.
type Options struct {
// Shallow drops --checksum and --hash blake3 so rclone uses its default
@@ -69,6 +77,15 @@ type Options struct {
// must keep it cheap (non-blocking channel send is the canonical
// shape). nil means no-op.
Progress func(runevents.Progress)
+ // Snapshot, if non-nil, drives the snapshot-on-sync feature (#75):
+ // after a run reaches a terminal success/partial state, a single
+ // VACUUM INTO snapshot is taken (lazily, once per CLI invocation) to
+ // the local tier and, for destination syncs, ridden along to the
+ // destination bucket. nil disables the feature for this run (dry-run,
+ // restore, or `[backups] enabled=false`). One Snapshotter is shared
+ // across every pair of one `squirrel sync` so the VACUUM happens once
+ // and fans out.
+ Snapshot *Snapshotter
}
// Report is the outcome of one Sync invocation. Volume and Destination
@@ -107,6 +124,13 @@ type Report struct {
// and the new BLAKE3 plus the receiver-relative preserved path so
// the CLI can render a meaningful "review at " pointer.
NodeConflicts []syncproto.ConflictDetail
+ // SnapshotErr captures a failure to take the post-sync index
+ // snapshot or to ride it along to the destination (#75). It is
+ // strictly defense-in-depth: a snapshot failure must not flip a
+ // successful sync to failed, so it is surfaced here and logged
+ // rather than folded into Status. Nil when no snapshot was attempted
+ // (dry-run, disabled, or a run that didn't reach success/partial).
+ SnapshotErr error
// NodePendingWarnings is the receiver's drift-detection advisory
// from the handshake (#17): one line per audit run on the volume
// since the last successful sync that flipped content
@@ -195,6 +219,11 @@ func Sync(ctx context.Context, s *store.Store, rcl *Rclone, vol *config.Volume,
func(runID int64) ([]string, error) {
return buildRcloneArgs(vol, dest, runID, opts)
})
+ // runRcloneOperation's deferred finishRun has committed the run's
+ // terminal state by now, so the snapshot reflects this run's own row.
+ // Destination syncs are eligible for the cloud ride-along; the
+ // Snapshotter no-ops on dry-run and on non-terminal-success states.
+ opts.Snapshot.afterSync(ctx, &rep, vol, dest)
return rep, err
}
@@ -463,6 +492,12 @@ func buildRcloneArgs(vol *config.Volume, dest *config.Destination, runID int64,
// source volume; filtering it out of sync uploads keeps it
// from leaking onto destinations.
"--filter", "- /" + RestoreHistoryDirName + "/**",
+ // .squirrel-index holds the cloud ride-along snapshots (#75).
+ // It is written by a separate copyto *after* the data transfer,
+ // so filtering it out of the data sync keeps an existing snapshot
+ // dir from being treated as user content (re-uploaded, or pulled
+ // back down on restore).
+ "--filter", "- /" + IndexDirName + "/**",
}
if !opts.Shallow {
args = append(args, "--checksum", "--hash", "blake3")
@@ -793,6 +828,7 @@ func buildRestoreArgs(vol *config.Volume, dest *config.Destination, runID int64,
args = append(args, "--filter", "- /"+HistoryDirName+"/**")
args = append(args, "--filter", "- /"+volmark.MarkerName)
args = append(args, "--filter", "- /"+RestoreHistoryDirName+"/**")
+ args = append(args, "--filter", "- /"+IndexDirName+"/**")
}
if !opts.Shallow {
args = append(args, "--checksum", "--hash", "blake3")