Skip to content
20 changes: 14 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,22 +65,30 @@ A volume can declare a per-volume **hook** — a command the agent runs to nudge

```toml
[volumes.pictures.hook]
command = ["kopia", "snapshot", "create", "."]
timeout = "30m" # optional, defaults to 1h
command = ["kopia", "snapshot", "create", "."]
timeout = "30m" # optional, defaults to 1h
interval = "24h" # optional — also fire on this cadence (see below)
```

The hook fires after a successful index run on the volume (which the agent runs on the `index_every` / `sync_every` cadence). It is **best-effort**: a hook failure or timeout never fails or blocks the run that triggered it, and overlapping invocations for the same volume are skipped rather than stacked. The command receives:
A hook fires on two triggers, both reusing the same command:

- **on change** — after every successful index run on the volume (which the agent runs on the `index_every` / `sync_every` cadence). This answers *"is the latest content backed up?"*. It keys off content settling, not off a sync to a remote, so a volume needs no `sync_to` destination for the hook to be useful.
- **on interval** — every `interval`, *regardless of whether anything changed*. This answers *"is the existing backup still intact?"*. Verification is orthogonal to change — bitrot happens to static data — so re-checks have to run on a clock. Omit `interval` to fire on-change only.

The command tells the two apart via `SQUIRREL_TRIGGER` (so a single command can back up on change and verify on interval). It is **best-effort**: a hook failure or timeout never fails or blocks the run that triggered it, and overlapping invocations for the same volume are skipped rather than stacked. The command receives:

| Variable | Meaning |
|---|---|
| `SQUIRREL_VOLUME` | volume name |
| `SQUIRREL_PATH` | absolute volume path |
| `SQUIRREL_RUN_ID` | the index run that triggered the hook |
| `SQUIRREL_CHANGED` | `true`/`false` — whether the run observed changes (so the command can cheaply no-op) |
| `SQUIRREL_TRIGGER` | `change` |
| `SQUIRREL_RUN_ID` | the index run that triggered the hook (empty on the interval trigger) |
| `SQUIRREL_CHANGED` | `true`/`false` — whether the run observed changes (so the command can cheaply no-op); always `false` on the interval trigger |
| `SQUIRREL_TRIGGER` | `change` or `interval` |

Because the command is exec'd without a shell, the volume path is never string-concatenated into a command line. If you want shell features, make the command `["sh", "-c", "…"]` yourself. Recorded outcomes are visible via `squirrel hooks` and the TUI's Hooks tab.

> **Don't double-schedule verification.** If your external tool already runs its own verify on a timer (e.g. a cron/systemd job), don't *also* set `interval` for a verify command — two heavy passes will step on each other. Pick one driver: let squirrel schedule it (so the result lands in `squirrel hooks` / the TUI) **or** let the tool schedule it (maximum independence — verification keeps happening even when the agent is down), not both.

## Quickstart

Index a configured volume:
Expand Down
6 changes: 1 addition & 5 deletions agent/hooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,7 @@ func newHookRunner(s *store.Store, logger *slog.Logger) *hookRunner {
//
// A nil receiver is a no-op so tests can construct a bare scheduler
// without wiring a runner.
//
// trigger is always "change" until the interval caller lands in #86;
// keeping it a parameter keeps the foundation trigger-agnostic, hence the
// nolint until the second caller exercises the other value.
func (h *hookRunner) fire(ctx context.Context, vol *config.Volume, volumeID int64, trigger string, triggeringRunID int64, changed bool) { //nolint:unparam
func (h *hookRunner) fire(ctx context.Context, vol *config.Volume, volumeID int64, trigger string, triggeringRunID int64, changed bool) {
if h == nil || vol.Hook == nil {
return
}
Expand Down
80 changes: 80 additions & 0 deletions agent/hooks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,83 @@ func TestSchedulerFiresChangeHook(t *testing.T) {
t.Fatalf("hook command did not run (marker absent): %v", err)
}
}

// TestSchedulerFiresIntervalHook drives the interval ("check") trigger:
// a volume with only a hook interval (no index/sync cadence) fires the
// command on its cadence regardless of change, and not before. The
// trigger is recorded as 'interval' with no triggering run and
// changed=false.
func TestSchedulerFiresIntervalHook(t *testing.T) {
vol := &config.Volume{
Name: "v",
Hook: &config.VolumeHook{
Command: []string{"sh", "-c", "exit 0"},
Timeout: 5 * time.Second,
Interval: 10 * time.Minute,
},
}
f := newSchedulerFixture(t, vol)
sched := f.scheduler()
ctx := context.Background()

// First tick: no prior interval hook, so it is due.
sched.tick(ctx)
sched.hooks.wait()
assertIntervalHookCount(t, f, 1)

// The interval hook fires independent of indexing — no runs row exists.
runs, err := f.store.ListRuns(ctx, store.ListRunsOpts{})
if err != nil {
t.Fatalf("ListRuns: %v", err)
}
if len(runs) != 0 {
t.Fatalf("interval hook created %d runs rows; want 0 (it never indexes)", len(runs))
}

hooks, err := f.store.ListHookRuns(ctx, store.HookRunListOpts{Descending: true})
if err != nil {
t.Fatalf("ListHookRuns: %v", err)
}
hr := hooks[0]
if hr.Trigger != store.HookTriggerInterval {
t.Fatalf("trigger = %q, want interval", hr.Trigger)
}
if hr.TriggeringRunID.Valid {
t.Fatalf("TriggeringRunID = %v, want NULL for interval hook", hr.TriggeringRunID)
}
if hr.Changed {
t.Fatalf("Changed = true, want false for interval hook")
}
if hr.Status != store.HookStatusSuccess {
t.Fatalf("status = %q, want success", hr.Status)
}

// Under the cadence: a second tick must not re-fire.
f.clock.Add(5 * time.Minute)
sched.tick(ctx)
sched.hooks.wait()
assertIntervalHookCount(t, f, 1)

// Past the cadence: the third tick re-fires.
f.clock.Add(10 * time.Minute)
sched.tick(ctx)
sched.hooks.wait()
assertIntervalHookCount(t, f, 2)
}

func assertIntervalHookCount(t *testing.T, f *schedulerFixture, want int) {
t.Helper()
hooks, err := f.store.ListHookRuns(context.Background(), store.HookRunListOpts{})
if err != nil {
t.Fatalf("ListHookRuns: %v", err)
}
n := 0
for _, h := range hooks {
if h.Trigger == store.HookTriggerInterval {
n++
}
}
if n != want {
t.Fatalf("interval hook runs = %d, want %d", n, want)
}
}
66 changes: 64 additions & 2 deletions agent/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,24 @@ func newScheduler(srv *Server, tickEvery time.Duration, now func() time.Time) *s
// when nothing is scheduled.
func (s *scheduler) anyScheduledVolume() bool {
for _, v := range s.volumes {
if v.SyncEvery > 0 || v.IndexEvery > 0 {
if volumeHasCadence(v) {
return true
}
}
return false
}

// volumeHasCadence reports whether a volume opts into any scheduler-driven
// cadence: a sync, a standalone index, or an interval hook. The interval
// hook counts on its own — a volume can want periodic verification of its
// external backup without any squirrel index/sync schedule.
func volumeHasCadence(v *config.Volume) bool {
if v.SyncEvery > 0 || v.IndexEvery > 0 {
return true
}
return v.Hook != nil && v.Hook.Interval > 0
}

// run is the scheduler's main loop. Returns on ctx cancellation. One
// tick body never overlaps with the next: a slow tick simply drops the
// queued ticker fire (time.Ticker's 1-buffer channel discards values
Expand Down Expand Up @@ -112,7 +123,7 @@ func (s *scheduler) tick(ctx context.Context) {
return
}
v := s.volumes[name]
if v.SyncEvery == 0 && v.IndexEvery == 0 {
if !volumeHasCadence(v) {
continue
}
s.evaluateVolume(ctx, v)
Expand All @@ -133,6 +144,13 @@ func sortedVolumeNames(m map[string]*config.Volume) []string {
// indexes immediately before pushing; if that pre-sync index ran, it
// satisfies the standalone-index cadence too (both write kind='index'
// rows the same way) and the standalone branch naturally skips.
//
// The interval hook is evaluated last, after any index/sync this tick.
// That ordering means a tick that re-indexed the source (verifying HDD1)
// also fires the external check (verifying HDD2) right after — verify the
// source, then verify the backup — without coupling the two cadences:
// each runs on its own clock and the interval hook fires regardless of
// whether an index ran.
func (s *scheduler) evaluateVolume(ctx context.Context, vol *config.Volume) {
v, err := s.resolveVolume(ctx, vol.Name, vol.Path)
if err != nil {
Expand All @@ -144,6 +162,50 @@ func (s *scheduler) evaluateVolume(ctx context.Context, vol *config.Volume) {
if !syncFired && vol.IndexEvery > 0 {
s.maybeRunIndex(ctx, vol, v.ID, "standalone", vol.IndexEvery)
}
s.maybeFireIntervalHook(ctx, vol, v.ID)
}

// maybeFireIntervalHook fires the volume's hook on its interval cadence
// (#86), independent of any change. It is the time-driven counterpart to
// the on-change fire in executeIndex: verification is orthogonal to change
// (bitrot hits static data), so it must run on a clock. The fire reuses
// the foundation's exec/env/best-effort/don't-stack/timeout path with the
// trigger set to interval; SQUIRREL_CHANGED is always false here (no run
// observed anything) and there is no triggering run.
func (s *scheduler) maybeFireIntervalHook(ctx context.Context, vol *config.Volume, volumeID int64) {
if vol.Hook == nil || vol.Hook.Interval <= 0 {
return
}
due, err := s.intervalHookDue(ctx, volumeID, vol.Hook.Interval)
if err != nil {
s.logger.Error("scheduler.error",
"kind", "hook", "volume", vol.Name, "err", err.Error())
return
}
if !due {
return
}
s.hooks.fire(ctx, vol, volumeID, store.HookTriggerInterval, 0, false)
}

// intervalHookDue reports whether `now - last_interval_hook >= cadence`
// for the volume. A volume with no interval hook run yet is always due.
// The last run's end timestamp anchors the cadence (falling back to its
// start, mirroring elapsedSince); a still-running hook is handled by the
// don't-stack guard inside fire, not here.
func (s *scheduler) intervalHookDue(ctx context.Context, volumeID int64, cadence time.Duration) (bool, error) {
r, err := s.store.LatestHookRun(ctx, volumeID, store.HookTriggerInterval)
if err != nil {
if store.IsNotFound(err) {
return true, nil
}
return false, fmt.Errorf("lookup last interval hook: %w", err)
}
ts := r.StartedAtNs
if r.EndedAtNs.Valid {
ts = r.EndedAtNs.Int64
}
return s.now().Sub(time.Unix(0, ts)) >= cadence, nil
}

// resolveVolume looks up (or creates) the volume row by name. Mirrors
Expand Down
22 changes: 18 additions & 4 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,14 @@ type VolumeHook struct {
// Timeout bounds one invocation so a hung hook can't wedge the agent's
// scheduler. Zero is replaced with DefaultHookTimeout at load time.
Timeout time.Duration
// Interval is the cadence for the interval ("check") trigger: the
// agent fires the same command on this period regardless of whether
// content changed (the motivating use is periodic backup
// verification, where bitrot happens to static data and so must be
// re-checked on a clock, not on an event). Zero means "no interval
// firing" — the hook then fires only on-change. The command tells the
// two triggers apart via SQUIRREL_TRIGGER (change vs interval).
Interval time.Duration
}

// DefaultHookTimeout is the per-invocation timeout applied when a hook
Expand Down Expand Up @@ -193,8 +201,9 @@ type rawVolume struct {
}

type rawVolumeHook struct {
Command []string `toml:"command"`
Timeout string `toml:"timeout"`
Command []string `toml:"command"`
Timeout string `toml:"timeout"`
Interval string `toml:"interval"`
}

func (r *rawConfig) resolve(path string) (*Config, error) {
Expand Down Expand Up @@ -328,9 +337,14 @@ func resolveVolumeHook(raw *rawVolumeHook) (*VolumeHook, error) {
}
timeout = dur
}
interval, err := parseVolumeCadence("hook.interval", raw.Interval)
if err != nil {
return nil, err
}
return &VolumeHook{
Command: append([]string(nil), raw.Command...),
Timeout: timeout,
Command: append([]string(nil), raw.Command...),
Timeout: timeout,
Interval: interval,
}, nil
}

Expand Down
65 changes: 65 additions & 0 deletions config/hook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,71 @@ command = ["backup.sh"]
}
}

func TestLoadVolumeHookInterval(t *testing.T) {
p := writeConfig(t, `
[volumes.photos]
path = "/tmp/photos"

[volumes.photos.hook]
command = ["kopia", "snapshot", "verify"]
interval = "24h"
`)
cfg, err := Load(p)
if err != nil {
t.Fatalf("Load: %v", err)
}
if got := cfg.Volumes["photos"].Hook.Interval; got != 24*time.Hour {
t.Fatalf("Interval = %s, want 24h", got)
}
}

func TestLoadVolumeHookIntervalDefaultsZero(t *testing.T) {
p := writeConfig(t, `
[volumes.photos]
path = "/tmp/photos"

[volumes.photos.hook]
command = ["backup.sh"]
`)
cfg, err := Load(p)
if err != nil {
t.Fatalf("Load: %v", err)
}
if got := cfg.Volumes["photos"].Hook.Interval; got != 0 {
t.Fatalf("Interval = %s, want 0 (no interval firing)", got)
}
}

func TestLoadVolumeHookIntervalErrors(t *testing.T) {
cases := []struct {
name string
body string
want string
}{
{
name: "bad interval",
body: "[volumes.v]\npath=\"/tmp/v\"\n[volumes.v.hook]\ncommand=[\"x\"]\ninterval=\"nope\"\n",
want: "hook.interval",
},
{
name: "below floor",
body: "[volumes.v]\npath=\"/tmp/v\"\n[volumes.v.hook]\ncommand=[\"x\"]\ninterval=\"10ms\"\n",
want: "hook.interval must be at least",
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
_, err := Load(writeConfig(t, tc.body))
if err == nil {
t.Fatalf("expected error containing %q, got nil", tc.want)
}
if !strings.Contains(err.Error(), tc.want) {
t.Fatalf("error = %v, want substring %q", err, tc.want)
}
})
}
}

func TestLoadVolumeHookNoBlock(t *testing.T) {
p := writeConfig(t, `
[volumes.photos]
Expand Down
22 changes: 22 additions & 0 deletions store/hookruns.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,25 @@ func (s *Store) ListHookRuns(ctx context.Context, opts HookRunListOpts) ([]HookR
}
return queryRows(ctx, s.db, query, scanHookRunRow, args...)
}

// LatestHookRun returns the most recent hook run for the given (volume,
// trigger), or sql.ErrNoRows when none exists. The interval-hook scheduler
// computes `now - last_run` from this row to decide whether a
// cadence-driven check is due; a still-running row counts (its
// started_at_ns anchors the cadence, and the don't-stack guard prevents a
// second concurrent invocation regardless).
//
// Ordering is by started_at_ns so the idx_hook_runs_volume_trigger index
// (volume_id, trigger, started_at_ns) satisfies both the filter and the
// sort — no separate sort pass as hook history grows. id breaks ties
// deterministically and is the index's implicit trailing rowid, so it
// stays index-only.
func (s *Store) LatestHookRun(ctx context.Context, volumeID int64, trigger string) (HookRun, error) {
row := s.db.QueryRowContext(ctx, `
SELECT `+hookRunColumns+`
FROM hook_runs
WHERE volume_id = ? AND trigger = ?
ORDER BY started_at_ns DESC, id DESC LIMIT 1
`, volumeID, trigger)
Comment on lines +191 to +195
return scanHookRun(row.Scan)
}
Loading
Loading