diff --git a/README.md b/README.md index b4b1a25..18a6049 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,28 @@ Supported destination types: `local`, `sftp`, `s3`, `b2`, `gcs`. Secrets accept Squirrel writes its own `rclone.conf` next to the config (`~/.squirrel/rclone.conf`, mode 0600) on every sync invocation. You do not run `rclone config` and you should not edit `rclone.conf` by hand. +### Hooks + +A volume can declare a per-volume **hook** — a command the agent runs to nudge an external tool when the volume's content changes. squirrel stays tool-agnostic: it never learns what the command does (a backup with kopia/restic, an `rclone copy`, a shell script — all the same to squirrel). It exec's the command **without a shell**, passes context through environment variables, and records only the generic outcome (exit code, timestamps). + +```toml +[volumes.pictures.hook] +command = ["kopia", "snapshot", "create", "."] +timeout = "30m" # optional, defaults to 1h +``` + +The hook fires after a successful index run on the volume (which the agent runs on the `index_every` / `sync_every` cadence). It is **best-effort**: a hook failure or timeout never fails or blocks the run that triggered it, and overlapping invocations for the same volume are skipped rather than stacked. The command receives: + +| Variable | Meaning | +|---|---| +| `SQUIRREL_VOLUME` | volume name | +| `SQUIRREL_PATH` | absolute volume path | +| `SQUIRREL_RUN_ID` | the index run that triggered the hook | +| `SQUIRREL_CHANGED` | `true`/`false` — whether the run observed changes (so the command can cheaply no-op) | +| `SQUIRREL_TRIGGER` | `change` | + +Because the command is exec'd without a shell, the volume path is never string-concatenated into a command line. If you want shell features, make the command `["sh", "-c", "…"]` yourself. Recorded outcomes are visible via `squirrel hooks` and the TUI's Hooks tab. + ## Quickstart Index a configured volume: diff --git a/agent/hooks.go b/agent/hooks.go new file mode 100644 index 0000000..9287a24 --- /dev/null +++ b/agent/hooks.go @@ -0,0 +1,186 @@ +package agent + +import ( + "context" + "database/sql" + "fmt" + "log/slog" + "strings" + "sync" + "time" + + "github.com/mbertschler/squirrel/config" + "github.com/mbertschler/squirrel/hook" + "github.com/mbertschler/squirrel/store" +) + +// hookRunner owns the lifecycle of per-volume external-tool hooks (#84): +// the don't-stack guard, the spawn/bound/reap goroutine, and the generic +// outcome recording. One per scheduler. All firing is best-effort — every +// failure is logged via the agent logger and never propagated, so a hook +// can neither fail nor block the run that triggered it. +// +// Hooks run in their own goroutine (not on the scheduler tick) so a +// long-running or wedged command can't stall cadence evaluation; the tick +// only ever pays for the synchronous BeginHookRun insert. wait() lets the +// scheduler drain in-flight hooks on shutdown. +type hookRunner struct { + store *store.Store + logger *slog.Logger + + mu sync.Mutex + running map[int64]struct{} // volume ids with an in-flight invocation + wg sync.WaitGroup +} + +func newHookRunner(s *store.Store, logger *slog.Logger) *hookRunner { + return &hookRunner{ + store: s, + logger: logger, + running: make(map[int64]struct{}), + } +} + +// fire launches the volume's hook for the given trigger if one is +// configured and no invocation is already in flight for that volume. It +// returns immediately: the command runs in a tracked goroutine bounded by +// the hook's timeout and by ctx (agent shutdown). triggeringRunID is the +// index run that fired an on-change hook (zero for interval hooks); +// changed is the SQUIRREL_CHANGED value to pass. +// +// A nil receiver is a no-op so tests can construct a bare scheduler +// without wiring a runner. +// +// trigger is always "change" until the interval caller lands in #86; +// keeping it a parameter keeps the foundation trigger-agnostic, hence the +// nolint until the second caller exercises the other value. +func (h *hookRunner) fire(ctx context.Context, vol *config.Volume, volumeID int64, trigger string, triggeringRunID int64, changed bool) { //nolint:unparam + if h == nil || vol.Hook == nil { + return + } + if !h.tryStart(volumeID) { + // Don't stack: a previous invocation for this volume is still + // running. The next trigger (or the external tool's own schedule) + // catches up — skipping is the specified behaviour, not an error. + h.logger.Info("hook.skipped", + "volume", vol.Name, "trigger", trigger, + "reason", "previous invocation still running") + return + } + id, err := h.store.BeginHookRun(ctx, store.HookRunSpec{ + VolumeID: volumeID, + Trigger: trigger, + TriggeringRunID: triggeringRunID, + Changed: changed, + }) + if err != nil { + h.logger.Error("hook.error", + "volume", vol.Name, "trigger", trigger, + "err", fmt.Sprintf("begin hook run: %v", err)) + h.done(volumeID) + return + } + h.logger.Info("hook.kicked", + "volume", vol.Name, "trigger", trigger, + "hook_run_id", id, "run_id", triggeringRunID, "changed", changed) + + h.wg.Add(1) + go func() { + defer h.wg.Done() + defer h.done(volumeID) + h.execute(ctx, vol, id, trigger, triggeringRunID, changed) + }() +} + +// execute runs the command, then records the generic outcome. It runs on +// the hook goroutine; the recording uses a detached context so the outcome +// still lands even when ctx was cancelled by agent shutdown (which is what +// killed the command in the first place). +func (h *hookRunner) execute(ctx context.Context, vol *config.Volume, hookRunID int64, trigger string, triggeringRunID int64, changed bool) { + outcome := hook.Run(ctx, hook.Spec{ + Command: vol.Hook.Command, + Volume: vol.Name, + Path: vol.Path, + RunID: triggeringRunID, + Changed: changed, + Trigger: hook.Trigger(trigger), + Timeout: vol.Hook.Timeout, + }) + status, exitCode, errMsg := classifyOutcome(outcome) + + finishCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := h.store.FinishHookRun(finishCtx, hookRunID, status, exitCode, errMsg); err != nil { + h.logger.Error("hook.error", + "volume", vol.Name, "trigger", trigger, "hook_run_id", hookRunID, + "err", fmt.Sprintf("finish hook run: %v", err)) + } + h.logFinished(vol.Name, trigger, hookRunID, outcome, status) +} + +// logFinished emits the terminal hook log line. Failures additionally log +// at error level with the diagnostic so an operator tailing the agent sees +// them without scanning the hook_runs table. +func (h *hookRunner) logFinished(volume, trigger string, hookRunID int64, outcome hook.Outcome, status string) { + duration := time.Duration(outcome.EndedAtNs - outcome.StartedAtNs) + attrs := []any{ + "volume", volume, "trigger", trigger, "hook_run_id", hookRunID, + "status", status, "duration_ms", duration.Milliseconds(), + } + if outcome.HasExitCode { + attrs = append(attrs, "exit_code", outcome.ExitCode) + } + if outcome.TimedOut { + attrs = append(attrs, "timed_out", true) + } + h.logger.Info("hook.finished", attrs...) + if !outcome.Succeeded() { + h.logger.Error("hook.error", + "volume", volume, "trigger", trigger, "hook_run_id", hookRunID, + "err", outcome.Err.Error()) + } +} + +// classifyOutcome maps a hook.Outcome onto the store's generic columns. A +// process that produced an exit code records it (even on failure); a +// timeout or spawn failure leaves exit_code NULL. The error message folds +// in a stderr tail so the recorded row explains the failure on its own. +func classifyOutcome(outcome hook.Outcome) (status string, exitCode sql.NullInt64, errMsg string) { + if outcome.HasExitCode { + exitCode = sql.NullInt64{Int64: int64(outcome.ExitCode), Valid: true} + } + if outcome.Succeeded() { + return store.HookStatusSuccess, exitCode, "" + } + msg := outcome.Err.Error() + if tail := strings.TrimSpace(outcome.Stderr); tail != "" { + msg = msg + ": " + tail + } + return store.HookStatusFailed, exitCode, msg +} + +func (h *hookRunner) tryStart(volumeID int64) bool { + h.mu.Lock() + defer h.mu.Unlock() + if _, ok := h.running[volumeID]; ok { + return false + } + h.running[volumeID] = struct{}{} + return true +} + +func (h *hookRunner) done(volumeID int64) { + h.mu.Lock() + delete(h.running, volumeID) + h.mu.Unlock() +} + +// wait blocks until every in-flight hook goroutine has finished. The +// scheduler calls it on shutdown; because hooks are timeout-bounded and +// ctx cancellation kills the command, it returns promptly. +func (h *hookRunner) wait() { + if h == nil { + return + } + h.wg.Wait() +} diff --git a/agent/hooks_test.go b/agent/hooks_test.go new file mode 100644 index 0000000..9b67f79 --- /dev/null +++ b/agent/hooks_test.go @@ -0,0 +1,210 @@ +package agent + +import ( + "bytes" + "context" + "log/slog" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/mbertschler/squirrel/config" + "github.com/mbertschler/squirrel/store" +) + +// newHookFixture opens a store with one volume row and returns a hookRunner +// wired to a captured logger. Kept local to the hook tests so they don't +// lean on the scheduler fixture's clock/sync machinery they don't need. +func newHookFixture(t *testing.T) (*hookRunner, *store.Store, *bytes.Buffer, store.Volume) { + t.Helper() + s, err := store.Open(filepath.Join(t.TempDir(), "index.db")) + if err != nil { + t.Fatalf("store.Open: %v", err) + } + t.Cleanup(func() { _ = s.Close() }) + vol, err := s.CreateVolume(context.Background(), "v", t.TempDir()) + if err != nil { + t.Fatalf("CreateVolume: %v", err) + } + buf := &bytes.Buffer{} + logger := slog.New(slog.NewTextHandler(buf, &slog.HandlerOptions{Level: slog.LevelDebug})) + return newHookRunner(s, logger), s, buf, vol +} + +func hookVol(cmd []string) *config.Volume { + return &config.Volume{ + Name: "v", + Path: "/tmp/v", + Hook: &config.VolumeHook{Command: cmd, Timeout: 5 * time.Second}, + } +} + +func TestHookRunnerFireSuccess(t *testing.T) { + h, s, _, vol := newHookFixture(t) + ctx := context.Background() + // A change hook's triggering run must exist — the column is an FK. + runID, err := s.BeginIndexRun(ctx, store.RunKindIndex, vol.ID, true) + if err != nil { + t.Fatalf("BeginIndexRun: %v", err) + } + + h.fire(ctx, hookVol([]string{"sh", "-c", "exit 0"}), vol.ID, store.HookTriggerChange, runID, true) + h.wait() + + runs, err := s.ListHookRuns(ctx, store.HookRunListOpts{}) + if err != nil { + t.Fatalf("ListHookRuns: %v", err) + } + if len(runs) != 1 { + t.Fatalf("hook runs = %d, want 1", len(runs)) + } + r := runs[0] + if r.Status != store.HookStatusSuccess { + t.Fatalf("status = %q, want success", r.Status) + } + if !r.ExitCode.Valid || r.ExitCode.Int64 != 0 { + t.Fatalf("ExitCode = %v, want 0", r.ExitCode) + } + if r.Trigger != store.HookTriggerChange { + t.Fatalf("trigger = %q, want change", r.Trigger) + } + if !r.TriggeringRunID.Valid || r.TriggeringRunID.Int64 != runID { + t.Fatalf("TriggeringRunID = %v, want %d", r.TriggeringRunID, runID) + } + if !r.Changed { + t.Fatalf("Changed = false, want true") + } +} + +func TestHookRunnerFireFailureRecorded(t *testing.T) { + h, s, _, vol := newHookFixture(t) + ctx := context.Background() + runID, err := s.BeginIndexRun(ctx, store.RunKindIndex, vol.ID, true) + if err != nil { + t.Fatalf("BeginIndexRun: %v", err) + } + + h.fire(ctx, hookVol([]string{"sh", "-c", "echo boom 1>&2; exit 3"}), vol.ID, store.HookTriggerChange, runID, true) + h.wait() + + runs, _ := s.ListHookRuns(ctx, store.HookRunListOpts{}) + if len(runs) != 1 { + t.Fatalf("hook runs = %d, want 1", len(runs)) + } + r := runs[0] + if r.Status != store.HookStatusFailed { + t.Fatalf("status = %q, want failed", r.Status) + } + if !r.ExitCode.Valid || r.ExitCode.Int64 != 3 { + t.Fatalf("ExitCode = %v, want 3", r.ExitCode) + } + if !r.Error.Valid || !strings.Contains(r.Error.String, "boom") { + t.Fatalf("Error = %v, want it to carry the stderr tail", r.Error) + } +} + +func TestHookRunnerNoHookConfigured(t *testing.T) { + h, s, _, vol := newHookFixture(t) + ctx := context.Background() + // A volume with no [hook] block must never record a hook run. + h.fire(ctx, &config.Volume{Name: "v", Path: "/tmp/v"}, vol.ID, store.HookTriggerChange, 1, true) + h.wait() + runs, _ := s.ListHookRuns(ctx, store.HookRunListOpts{}) + if len(runs) != 0 { + t.Fatalf("hook runs = %d, want 0 when no hook is configured", len(runs)) + } +} + +func TestHookRunnerDontStack(t *testing.T) { + h, s, buf, vol := newHookFixture(t) + ctx := context.Background() + + // Simulate an in-flight invocation by holding the guard, then fire: + // the new invocation must be skipped, not stacked. + if !h.tryStart(vol.ID) { + t.Fatalf("tryStart returned false on a fresh volume") + } + h.fire(ctx, hookVol([]string{"sh", "-c", "exit 0"}), vol.ID, store.HookTriggerChange, 1, true) + h.wait() + h.done(vol.ID) + + runs, _ := s.ListHookRuns(ctx, store.HookRunListOpts{}) + if len(runs) != 0 { + t.Fatalf("hook runs = %d, want 0 — a stacked invocation must be skipped", len(runs)) + } + if !strings.Contains(buf.String(), "hook.skipped") { + t.Fatalf("expected a hook.skipped log line, got:\n%s", buf.String()) + } +} + +func TestHookGuardReleases(t *testing.T) { + h, _, _, vol := newHookFixture(t) + if !h.tryStart(vol.ID) { + t.Fatalf("first tryStart = false") + } + if h.tryStart(vol.ID) { + t.Fatalf("second tryStart = true, want false while in flight") + } + h.done(vol.ID) + if !h.tryStart(vol.ID) { + t.Fatalf("tryStart after done = false, want true") + } +} + +// TestSchedulerFiresChangeHook drives the real tick path: a standalone +// index run on a volume with a hook must record an on-change hook tied to +// that index run, and the index run itself must still report success (the +// hook is best-effort and never affects it). +func TestSchedulerFiresChangeHook(t *testing.T) { + marker := filepath.Join(t.TempDir(), "fired") + vol := &config.Volume{ + Name: "v", + IndexEvery: time.Minute, + Hook: &config.VolumeHook{ + // Touch a marker so we can prove the command actually ran. The + // marker path is passed as an argument ($1), not concatenated + // into the script, so a temp dir with spaces/metacharacters + // can't break the command. + Command: []string{"sh", "-c", `echo x > "$1"`, "sh", marker}, + Timeout: 5 * time.Second, + }, + } + f := newSchedulerFixture(t, vol) + f.seedFile() + sched := f.scheduler() + ctx := context.Background() + + sched.tick(ctx) + sched.hooks.wait() + + hooks, err := f.store.ListHookRuns(ctx, store.HookRunListOpts{}) + if err != nil { + t.Fatalf("ListHookRuns: %v", err) + } + if len(hooks) != 1 { + t.Fatalf("hook runs = %d, want 1 after a change-observing index", len(hooks)) + } + hr := hooks[0] + if hr.Trigger != store.HookTriggerChange { + t.Fatalf("trigger = %q, want change", hr.Trigger) + } + if hr.Status != store.HookStatusSuccess { + t.Fatalf("hook status = %q, want success", hr.Status) + } + if !hr.Changed { + t.Fatalf("Changed = false, want true (the seeded file is an addition)") + } + // The hook's triggering run must be the index run the tick produced. + idx, err := f.store.LatestSuccessfulIndexRun(ctx, hr.VolumeID) + if err != nil { + t.Fatalf("LatestSuccessfulIndexRun: %v", err) + } + if !hr.TriggeringRunID.Valid || hr.TriggeringRunID.Int64 != idx.ID { + t.Fatalf("TriggeringRunID = %v, want index run %d", hr.TriggeringRunID, idx.ID) + } + if _, err := os.Stat(marker); err != nil { + t.Fatalf("hook command did not run (marker absent): %v", err) + } +} diff --git a/agent/scheduler.go b/agent/scheduler.go index 1da92fe..0c282aa 100644 --- a/agent/scheduler.go +++ b/agent/scheduler.go @@ -35,6 +35,10 @@ type scheduler struct { locks lockHolder tickEvery time.Duration now func() time.Time + // hooks runs the per-volume external-tool hooks (#84). May be nil in + // tests that construct a bare scheduler; hookRunner methods tolerate a + // nil receiver so the firing sites need no extra guard. + hooks *hookRunner } // lockHolder is the subset of the peer-sync router the scheduler uses @@ -61,6 +65,7 @@ func newScheduler(srv *Server, tickEvery time.Duration, now func() time.Time) *s locks: srv.router, tickEvery: tickEvery, now: now, + hooks: newHookRunner(srv.store, srv.cfg.Logger), } } @@ -87,6 +92,9 @@ func (s *scheduler) run(ctx context.Context) { for { select { case <-ctx.Done(): + // Drain in-flight hooks before returning so Serve's shutdown + // wait doesn't race a hook goroutine writing its outcome. + s.hooks.wait() return case <-t.C: s.tick(ctx) @@ -326,9 +334,29 @@ func (s *scheduler) executeIndex(ctx context.Context, vol *config.Volume, volume "kind", "index", "volume", vol.Name, "run_id", rep.RunID, "err", err.Error()) } + if ok { + s.fireChangeHook(ctx, vol, volumeID, rep) + } return ok } +// fireChangeHook nudges the volume's external-tool hook after a successful +// index run (#85). Per the issue's lean default it fires on every +// completed run (success or partial) and passes SQUIRREL_CHANGED so the +// consumer can cheaply no-op when nothing moved — keying off "content +// settled" rather than off a sync to a remote, since a volume need not +// have any sync_to destination for the hook to be useful. The fire is +// best-effort and asynchronous, so it never affects the run that +// triggered it. +// +// "Changed" counts additions, modifications, and disappearances: a file +// going missing is as much a content change the backup should capture as +// a new or rewritten one. +func (s *scheduler) fireChangeHook(ctx context.Context, vol *config.Volume, volumeID int64, rep index.Report) { + changed := rep.Added+rep.Modified+rep.Missing > 0 + s.hooks.fire(ctx, vol, volumeID, store.HookTriggerChange, rep.RunID, changed) +} + // recordFailedIndex inserts and immediately finishes a failed // kind='index' run for volumeID, carrying runErr as the row's error // message. Used when index.Index couldn't even allocate its own diff --git a/agent/scheduler_test.go b/agent/scheduler_test.go index 5d9d8ff..9a4a8d6 100644 --- a/agent/scheduler_test.go +++ b/agent/scheduler_test.go @@ -179,6 +179,7 @@ func (f *schedulerFixture) scheduler() *scheduler { locks: f.srv.router, tickEvery: time.Second, now: f.clock.Now, + hooks: newHookRunner(f.srv.store, f.srv.cfg.Logger), } } diff --git a/agent/sync_test.go b/agent/sync_test.go index 23a920d..9f01dcd 100644 --- a/agent/sync_test.go +++ b/agent/sync_test.go @@ -64,7 +64,7 @@ func newPreStageFixture(t *testing.T) *preStageFixture { } // Watermark at/above the prior run's correlated id so the supersede // branch of dispositionForExisting fires for peer-sourced rows. - if err := srv.store.UpsertPeerSyncState(ctx, v.ID, peer.ID, correlated); err != nil { + if err := srv.store.UpsertPeerSyncState(ctx, v.ID, peer.ID, correlated, false); err != nil { t.Fatalf("UpsertPeerSyncState: %v", err) } recvRun, err := srv.store.BeginPeerSyncRun(ctx, v.ID, peer.ID, correlated+1, "peerA") diff --git a/cmd/squirrel/hooks.go b/cmd/squirrel/hooks.go new file mode 100644 index 0000000..a5ee008 --- /dev/null +++ b/cmd/squirrel/hooks.go @@ -0,0 +1,115 @@ +package main + +import ( + "database/sql" + "fmt" + "io" + "text/tabwriter" + + "github.com/spf13/cobra" + + "github.com/mbertschler/squirrel/store" +) + +// newHooksCmd returns the `squirrel hooks` cobra command. It lists rows +// from the hook_runs table — the generic outcome of each external-tool +// hook squirrel fired on a change or interval trigger (#84) — most recent +// first, with optional volume filtering and a configurable cap. squirrel +// records pass/fail and an exit code only; it never interprets what the +// command did, so the listing is deliberately tool-agnostic. +func newHooksCmd() *cobra.Command { + var ( + volumeName string + limit int + ) + cmd := &cobra.Command{ + Use: "hooks", + Short: "List external-tool hook runs (most recent first)", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + return runHooks(cmd, volumeName, limit) + }, + } + cmd.Flags().StringVar(&volumeName, "volume", "", "filter to hooks for this volume name") + cmd.Flags().IntVar(&limit, "limit", 20, "maximum number of hook runs to show (0 for no limit)") + return cmd +} + +func runHooks(cmd *cobra.Command, volumeName string, limit int) error { + cfg, err := tryLoadConfig(cmd) + if err != nil { + return err + } + s, err := openStore(cmd, cfg) + if err != nil { + return err + } + defer s.Close() + + opts := store.HookRunListOpts{Limit: limit, Descending: true} + if volumeName != "" { + v, err := s.GetVolumeByName(cmd.Context(), volumeName) + if err != nil { + if store.IsNotFound(err) { + return fmt.Errorf("no volume named %q", volumeName) + } + return fmt.Errorf("lookup volume %q: %w", volumeName, err) + } + opts.VolumeID = &v.ID + } + hooks, err := s.ListHookRuns(cmd.Context(), opts) + if err != nil { + return err + } + volumes, err := loadVolumeNames(cmd, s) + if err != nil { + return err + } + return printHookRuns(cmd.OutOrStdout(), hooks, volumes) +} + +func printHookRuns(out io.Writer, hooks []store.HookRun, volumes map[int64]string) error { + tw := tabwriter.NewWriter(out, 0, 0, 2, ' ', 0) + fmt.Fprintln(tw, "ID\tVOLUME\tTRIGGER\tRUN\tCHANGED\tSTARTED\tDURATION\tSTATUS\tEXIT\tERROR") + for _, h := range hooks { + fmt.Fprintf(tw, "%d\t%s\t%s\t%s\t%t\t%s\t%s\t%s\t%s\t%s\n", + h.ID, hookVolumeLabel(h.VolumeID, volumes), h.Trigger, + triggeringRunLabel(h.TriggeringRunID), h.Changed, + formatStarted(h.StartedAtNs), + formatDuration(h.StartedAtNs, h.EndedAtNs), + h.Status, hookExitLabel(h.ExitCode), + truncateError(h.Error), + ) + } + return tw.Flush() +} + +// hookVolumeLabel resolves a hook run's volume_id to its name. Unlike the +// runs listing, hook_runs.volume_id is NOT NULL (a hook is always scoped +// to one volume), so the only fallback is the defensive "id=N" for a row +// whose volume vanished. +func hookVolumeLabel(volumeID int64, volumes map[int64]string) string { + if name, ok := volumes[volumeID]; ok { + return name + } + return fmt.Sprintf("id=%d", volumeID) +} + +// triggeringRunLabel renders the index run that fired an on-change hook. +// Interval hooks have no triggering run, so the column shows "—". +func triggeringRunLabel(runID sql.NullInt64) string { + if !runID.Valid { + return "—" + } + return fmt.Sprintf("%d", runID.Int64) +} + +// hookExitLabel renders the recorded exit code. A timeout or spawn failure +// produced no code, so the column shows "—" and the ERROR column carries +// the reason. +func hookExitLabel(exitCode sql.NullInt64) string { + if !exitCode.Valid { + return "—" + } + return fmt.Sprintf("%d", exitCode.Int64) +} diff --git a/cmd/squirrel/root.go b/cmd/squirrel/root.go index 9e00aa1..3be292f 100644 --- a/cmd/squirrel/root.go +++ b/cmd/squirrel/root.go @@ -44,6 +44,7 @@ func newRootCmd() *cobra.Command { root.AddCommand(newIndexCmd()) root.AddCommand(newQueryCmd()) root.AddCommand(newRunsCmd()) + root.AddCommand(newHooksCmd()) root.AddCommand(newVolumesCmd()) root.AddCommand(newSyncCmd()) root.AddCommand(newRestoreCmd()) diff --git a/config/config.go b/config/config.go index 3a13d62..e3d512d 100644 --- a/config/config.go +++ b/config/config.go @@ -77,8 +77,40 @@ type Volume struct { // must be strictly shorter — equal or longer adds nothing on top // of the pre-sync indexing the scheduler already runs. IndexEvery time.Duration + // Hook is the per-volume external-tool hook, or nil when the volume + // declares no `[volumes.X.hook]` block. The agent exec's its command + // (without a shell) on a trigger and records only the generic outcome + // — squirrel never learns what the command does. See VolumeHook. + Hook *VolumeHook } +// VolumeHook is a per-volume, best-effort command squirrel runs to nudge +// an external tool when content settles or on a cadence (#84). squirrel +// stays tool-agnostic: it exec's Command without a shell, passes context +// via SQUIRREL_* environment variables, and records only the exit +// code/timestamps. There is intentionally no rules engine — a single +// command, distinguished across triggers by the SQUIRREL_TRIGGER env var +// the agent sets, not by separate config. +type VolumeHook struct { + // Command is the argv exec'd without a shell — Command[0] is the + // program, the rest its arguments. Users wanting shell features write + // `sh -c '…'` themselves; squirrel never string-concatenates the + // volume path into a command line. + Command []string + // Timeout bounds one invocation so a hung hook can't wedge the agent's + // scheduler. Zero is replaced with DefaultHookTimeout at load time. + Timeout time.Duration +} + +// DefaultHookTimeout is the per-invocation timeout applied when a hook +// block omits `timeout`. Generous because the motivating consumer (a +// backup tool snapshotting a large volume) can legitimately run for a +// while; the bound exists to reap a truly wedged process, not to cap +// normal work. Overlap between invocations is handled separately by the +// agent's don't-stack guard, so a hook that outlives its own cadence is +// skipped rather than stacked. +const DefaultHookTimeout = time.Hour + // Destination is one rclone-backed remote. Type drives which Params are // required and how the destination is rendered into rclone.conf. type Destination struct { @@ -153,10 +185,16 @@ type rawConfig struct { } type rawVolume struct { - Path string `toml:"path"` - SyncTo []string `toml:"sync_to"` - SyncEvery string `toml:"sync_every"` - IndexEvery string `toml:"index_every"` + Path string `toml:"path"` + SyncTo []string `toml:"sync_to"` + SyncEvery string `toml:"sync_every"` + IndexEvery string `toml:"index_every"` + Hook *rawVolumeHook `toml:"hook"` +} + +type rawVolumeHook struct { + Command []string `toml:"command"` + Timeout string `toml:"timeout"` } func (r *rawConfig) resolve(path string) (*Config, error) { @@ -248,12 +286,51 @@ func resolveVolume(name string, raw rawVolume, dests map[string]*Destination, no if syncEvery > 0 && indexEvery > 0 && indexEvery >= syncEvery { return nil, errors.New("index_every must be strictly shorter than sync_every (pre-sync indexing already runs at sync_every cadence)") } + hook, err := resolveVolumeHook(raw.Hook) + if err != nil { + return nil, err + } return &Volume{ Name: name, Path: abs, SyncTo: raw.SyncTo, SyncEvery: syncEvery, IndexEvery: indexEvery, + Hook: hook, + }, nil +} + +// resolveVolumeHook validates an optional `[volumes.X.hook]` block. A nil +// raw (no block) yields a nil hook. When present, command is required and +// every argv element must be non-empty (an empty element is almost always +// a templating mistake that would exec the wrong program). timeout is +// optional; empty falls back to DefaultHookTimeout. +func resolveVolumeHook(raw *rawVolumeHook) (*VolumeHook, error) { + if raw == nil { + return nil, nil + } + if len(raw.Command) == 0 { + return nil, errors.New("hook.command is required and must have at least one element") + } + for i, arg := range raw.Command { + if arg == "" { + return nil, fmt.Errorf("hook.command[%d] is empty", i) + } + } + timeout := DefaultHookTimeout + if raw.Timeout != "" { + dur, err := time.ParseDuration(raw.Timeout) + if err != nil { + return nil, fmt.Errorf("hook.timeout %q: %w", raw.Timeout, err) + } + if dur <= 0 { + return nil, fmt.Errorf("hook.timeout must be a positive duration, got %s", dur) + } + timeout = dur + } + return &VolumeHook{ + Command: append([]string(nil), raw.Command...), + Timeout: timeout, }, nil } diff --git a/config/hook_test.go b/config/hook_test.go new file mode 100644 index 0000000..5c51d68 --- /dev/null +++ b/config/hook_test.go @@ -0,0 +1,109 @@ +package config + +import ( + "strings" + "testing" + "time" +) + +func TestLoadVolumeHook(t *testing.T) { + p := writeConfig(t, ` +[volumes.photos] +path = "/tmp/photos" + +[volumes.photos.hook] +command = ["kopia", "snapshot", "create", "."] +timeout = "30m" +`) + cfg, err := Load(p) + if err != nil { + t.Fatalf("Load: %v", err) + } + v := cfg.Volumes["photos"] + if v.Hook == nil { + t.Fatalf("Hook is nil, want resolved") + } + want := []string{"kopia", "snapshot", "create", "."} + if strings.Join(v.Hook.Command, "\x00") != strings.Join(want, "\x00") { + t.Fatalf("Command = %v, want %v", v.Hook.Command, want) + } + if v.Hook.Timeout != 30*time.Minute { + t.Fatalf("Timeout = %s, want 30m", v.Hook.Timeout) + } +} + +func TestLoadVolumeHookDefaultTimeout(t *testing.T) { + p := writeConfig(t, ` +[volumes.photos] +path = "/tmp/photos" + +[volumes.photos.hook] +command = ["backup.sh"] +`) + cfg, err := Load(p) + if err != nil { + t.Fatalf("Load: %v", err) + } + if got := cfg.Volumes["photos"].Hook.Timeout; got != DefaultHookTimeout { + t.Fatalf("Timeout = %s, want default %s", got, DefaultHookTimeout) + } +} + +func TestLoadVolumeHookNoBlock(t *testing.T) { + p := writeConfig(t, ` +[volumes.photos] +path = "/tmp/photos" +`) + cfg, err := Load(p) + if err != nil { + t.Fatalf("Load: %v", err) + } + if cfg.Volumes["photos"].Hook != nil { + t.Fatalf("Hook = %#v, want nil when no [hook] block", cfg.Volumes["photos"].Hook) + } +} + +func TestLoadVolumeHookErrors(t *testing.T) { + cases := []struct { + name string + body string + want string + }{ + { + name: "missing command", + body: "[volumes.v]\npath=\"/tmp/v\"\n[volumes.v.hook]\ntimeout=\"1m\"\n", + want: "hook.command is required", + }, + { + name: "empty command list", + body: "[volumes.v]\npath=\"/tmp/v\"\n[volumes.v.hook]\ncommand=[]\n", + want: "hook.command is required", + }, + { + name: "empty arg", + body: "[volumes.v]\npath=\"/tmp/v\"\n[volumes.v.hook]\ncommand=[\"kopia\", \"\"]\n", + want: "hook.command[1] is empty", + }, + { + name: "bad timeout", + body: "[volumes.v]\npath=\"/tmp/v\"\n[volumes.v.hook]\ncommand=[\"x\"]\ntimeout=\"nope\"\n", + want: "hook.timeout", + }, + { + name: "non-positive timeout", + body: "[volumes.v]\npath=\"/tmp/v\"\n[volumes.v.hook]\ncommand=[\"x\"]\ntimeout=\"0s\"\n", + want: "hook.timeout must be a positive duration", + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + _, err := Load(writeConfig(t, tc.body)) + if err == nil { + t.Fatalf("expected error containing %q, got nil", tc.want) + } + if !strings.Contains(err.Error(), tc.want) { + t.Fatalf("error = %v, want substring %q", err, tc.want) + } + }) + } +} diff --git a/hook/hook.go b/hook/hook.go new file mode 100644 index 0000000..92af6f8 --- /dev/null +++ b/hook/hook.go @@ -0,0 +1,230 @@ +// Package hook runs per-volume, best-effort external commands on behalf +// of the squirrel agent (#84). It is deliberately tool-agnostic: it exec's +// a user-configured command WITHOUT a shell, passes context through +// SQUIRREL_* environment variables, bounds the run with a timeout, and +// reports only the generic outcome (exit code, timestamps, a stderr tail). +// It never interprets what the command does — backup, verify, anything. +// +// The package is a pure library: it writes to no global stdout/stderr and +// surfaces everything via the returned Outcome. The caller (agent) decides +// how to record and log it. +package hook + +import ( + "bytes" + "context" + "errors" + "fmt" + "os" + "os/exec" + "strconv" + "time" +) + +// Trigger discriminates why a hook fired. It is passed to the command as +// SQUIRREL_TRIGGER so a single command can branch (e.g. back up on change, +// verify on interval) without squirrel modelling either concept. +type Trigger string + +const ( + TriggerChange Trigger = "change" + TriggerInterval Trigger = "interval" +) + +// Env var names forming the hook contract. Documented here so the one +// place that sets them and any future reader agree on the spelling. +const ( + EnvVolume = "SQUIRREL_VOLUME" + EnvPath = "SQUIRREL_PATH" + EnvRunID = "SQUIRREL_RUN_ID" + EnvChanged = "SQUIRREL_CHANGED" + EnvTrigger = "SQUIRREL_TRIGGER" +) + +// maxStderrCapture bounds how much of the command's stderr we keep for the +// recorded diagnostic. A chatty hook must not let squirrel buffer +// unbounded memory; the tail is all an operator needs to see why it failed. +const maxStderrCapture = 8 << 10 + +// waitDelay bounds the grace period cmd.Wait gives in-flight I/O after the +// process is killed (timeout or shutdown). Small: by the time we are +// killing, we only want to unblock — not to wait on a lingering grandchild. +const waitDelay = 2 * time.Second + +// Spec is one resolved hook invocation. Command is exec'd verbatim (no +// shell); the SQUIRREL_* values are derived from the remaining fields. +type Spec struct { + // Command is argv — Command[0] is the program. Must be non-empty. + Command []string + // Volume and Path populate SQUIRREL_VOLUME / SQUIRREL_PATH. + Volume string + Path string + // RunID populates SQUIRREL_RUN_ID — the run that triggered the hook. + // Zero renders as empty (interval hooks have no triggering run). + RunID int64 + // Changed populates SQUIRREL_CHANGED ("true"/"false"). + Changed bool + // Trigger populates SQUIRREL_TRIGGER. + Trigger Trigger + // Timeout bounds the invocation. Must be positive. + Timeout time.Duration +} + +// Outcome is the generic result of one invocation. ExitCode is meaningful +// only when HasExitCode is true (the process ran to completion and +// returned a code); a spawn failure or a timeout leaves it unset. Err is +// nil exactly when the command exited 0. +type Outcome struct { + StartedAtNs int64 + EndedAtNs int64 + ExitCode int + HasExitCode bool + TimedOut bool + // Stderr is a bounded tail of what the command wrote to stderr. + Stderr string + // Err is non-nil on spawn failure, timeout, or non-zero exit. It is a + // generic description — squirrel never parses the command's output. + Err error +} + +// Succeeded reports whether the command exited 0. +func (o Outcome) Succeeded() bool { return o.Err == nil } + +// Run exec's spec.Command without a shell, bounded by spec.Timeout (and by +// ctx — agent shutdown cancels in-flight hooks). It always returns a +// populated Outcome; a non-nil Outcome.Err describes the failure rather +// than being returned separately, because every caller wants to record the +// outcome regardless. ctx cancellation and the timeout both kill the +// process; the timeout is reported as TimedOut, a parent cancellation as a +// plain error. +func Run(ctx context.Context, spec Spec) Outcome { + out := Outcome{StartedAtNs: time.Now().UnixNano()} + if len(spec.Command) == 0 { + out.EndedAtNs = time.Now().UnixNano() + out.Err = errors.New("hook: empty command") + return out + } + // A non-positive timeout would make context.WithTimeout fire + // immediately, surfacing as a phantom "timed out" before the command + // ran. Callers are contractually required to pass a positive bound (the + // config layer defaults it), so treat a violation as a clear error + // rather than an instant, confusing timeout. + if spec.Timeout <= 0 { + out.EndedAtNs = time.Now().UnixNano() + out.Err = fmt.Errorf("hook: timeout must be positive, got %s", spec.Timeout) + return out + } + + runCtx, cancel := context.WithTimeout(ctx, spec.Timeout) + defer cancel() + + cmd := exec.CommandContext(runCtx, spec.Command[0], spec.Command[1:]...) + cmd.Env = append(os.Environ(), spec.env()...) + var stderr boundedBuffer + stderr.limit = maxStderrCapture + cmd.Stderr = &stderr + // The command's stdout is the consumer's business, not squirrel's — + // discard it so a chatty hook can't flood the agent's own streams. + cmd.Stdout = nil + // WaitDelay bounds how long Run blocks after the context is cancelled + // (timeout or shutdown). Without it, a hook that spawns a grandchild + // which inherits the stderr pipe would keep cmd.Wait blocked until that + // grandchild exits — exactly the "hung hook wedges the scheduler" case + // the contract forbids. After the delay the pipe is force-closed and + // Run returns; any orphaned grandchild is the consumer's problem, not + // squirrel's. + cmd.WaitDelay = waitDelay + + runErr := cmd.Run() + out.EndedAtNs = time.Now().UnixNano() + out.Stderr = stderr.String() + classify(&out, runCtx, ctx, spec.Timeout, runErr) + return out +} + +// classify folds the raw exec error into the Outcome's generic shape. The +// timeout check comes first because a deadline-killed process surfaces as +// an ExitError with code -1, which must not be mistaken for a real exit. +func classify(out *Outcome, runCtx, parentCtx context.Context, timeout time.Duration, runErr error) { + if runErr == nil { + out.ExitCode = 0 + out.HasExitCode = true + return + } + // Parent cancellation (agent shutdown) takes precedence over the + // timeout: the run was cut short by us, not by exceeding its budget. + if parentCtx.Err() != nil { + out.Err = fmt.Errorf("hook cancelled: %w", parentCtx.Err()) + return + } + if errors.Is(runCtx.Err(), context.DeadlineExceeded) { + out.TimedOut = true + out.Err = fmt.Errorf("hook timed out after %s", timeout) + return + } + var exitErr *exec.ExitError + if errors.As(runErr, &exitErr) { + out.ExitCode = exitErr.ExitCode() + out.HasExitCode = true + out.Err = fmt.Errorf("hook exited %d", out.ExitCode) + return + } + // Spawn failure: program not found, not executable, etc. No exit code. + out.Err = fmt.Errorf("hook failed to run: %w", runErr) +} + +// env renders the SQUIRREL_* contract for one invocation. SQUIRREL_RUN_ID +// is empty for interval hooks (RunID == 0) so the consumer can tell "fired +// by a run" from "fired by the clock" without inspecting the trigger. +func (s Spec) env() []string { + runID := "" + if s.RunID != 0 { + runID = strconv.FormatInt(s.RunID, 10) + } + return []string{ + EnvVolume + "=" + s.Volume, + EnvPath + "=" + s.Path, + EnvRunID + "=" + runID, + EnvChanged + "=" + strconv.FormatBool(s.Changed), + EnvTrigger + "=" + string(s.Trigger), + } +} + +// boundedBuffer is an io.Writer that keeps at most limit bytes — the LAST +// limit bytes written (the tail), dropping older overflow. For a failing +// command the most recent stderr is the part that explains the failure, so +// keeping the tail (not the head) is what makes the recorded diagnostic +// useful, while still bounding memory against a chatty hook. +type boundedBuffer struct { + buf bytes.Buffer + limit int +} + +func (b *boundedBuffer) Write(p []byte) (int, error) { + n := len(p) + if b.limit <= 0 { + return n, nil + } + // A single write larger than the limit: keep only its tail, discarding + // everything buffered before it (all of which is now older). + if len(p) >= b.limit { + b.buf.Reset() + b.buf.Write(p[len(p)-b.limit:]) + return n, nil + } + // Otherwise make room by dropping the oldest buffered bytes so the + // running total stays within the limit, then append. + if over := b.buf.Len() + len(p) - b.limit; over > 0 { + rest := b.buf.Bytes()[over:] + kept := make([]byte, len(rest)) + copy(kept, rest) + b.buf.Reset() + b.buf.Write(kept) + } + b.buf.Write(p) + // Report the full length as written: we intentionally discard older + // overflow rather than failing the command's write. + return n, nil +} + +func (b *boundedBuffer) String() string { return b.buf.String() } diff --git a/hook/hook_test.go b/hook/hook_test.go new file mode 100644 index 0000000..37a8c34 --- /dev/null +++ b/hook/hook_test.go @@ -0,0 +1,171 @@ +package hook + +import ( + "context" + "strings" + "testing" + "time" +) + +func TestRunSuccess(t *testing.T) { + out := Run(context.Background(), Spec{ + Command: []string{"sh", "-c", "exit 0"}, + Timeout: 5 * time.Second, + }) + if !out.Succeeded() { + t.Fatalf("Succeeded = false, err = %v", out.Err) + } + if !out.HasExitCode || out.ExitCode != 0 { + t.Fatalf("ExitCode = %d (has=%v), want 0", out.ExitCode, out.HasExitCode) + } + if out.EndedAtNs < out.StartedAtNs { + t.Fatalf("EndedAtNs %d < StartedAtNs %d", out.EndedAtNs, out.StartedAtNs) + } +} + +func TestRunNonZeroExit(t *testing.T) { + out := Run(context.Background(), Spec{ + Command: []string{"sh", "-c", "exit 7"}, + Timeout: 5 * time.Second, + }) + if out.Succeeded() { + t.Fatalf("Succeeded = true, want failure") + } + if !out.HasExitCode || out.ExitCode != 7 { + t.Fatalf("ExitCode = %d (has=%v), want 7", out.ExitCode, out.HasExitCode) + } + if out.TimedOut { + t.Fatalf("TimedOut = true, want false") + } +} + +func TestRunPassesEnv(t *testing.T) { + out := Run(context.Background(), Spec{ + Command: []string{"sh", "-c", `printf '%s|%s|%s|%s|%s' "$SQUIRREL_VOLUME" "$SQUIRREL_PATH" "$SQUIRREL_RUN_ID" "$SQUIRREL_CHANGED" "$SQUIRREL_TRIGGER" 1>&2`}, + Volume: "photos", + Path: "/mnt/hdd1/photos", + RunID: 42, + Changed: true, + Trigger: TriggerChange, + Timeout: 5 * time.Second, + }) + if !out.Succeeded() { + t.Fatalf("Succeeded = false, err = %v", out.Err) + } + want := "photos|/mnt/hdd1/photos|42|true|change" + if out.Stderr != want { + t.Fatalf("env = %q, want %q", out.Stderr, want) + } +} + +func TestRunIntervalRunIDEmpty(t *testing.T) { + out := Run(context.Background(), Spec{ + Command: []string{"sh", "-c", `printf '%s' "$SQUIRREL_RUN_ID" 1>&2`}, + RunID: 0, + Trigger: TriggerInterval, + Timeout: 5 * time.Second, + }) + if out.Stderr != "" { + t.Fatalf("SQUIRREL_RUN_ID = %q, want empty for interval hook", out.Stderr) + } +} + +func TestRunTimeout(t *testing.T) { + start := time.Now() + out := Run(context.Background(), Spec{ + Command: []string{"sh", "-c", "sleep 10"}, + Timeout: 50 * time.Millisecond, + }) + if elapsed := time.Since(start); elapsed > 5*time.Second { + t.Fatalf("Run took %s, timeout was not enforced", elapsed) + } + if !out.TimedOut { + t.Fatalf("TimedOut = false, want true") + } + if out.Succeeded() { + t.Fatalf("Succeeded = true, want failure on timeout") + } +} + +func TestRunSpawnFailure(t *testing.T) { + out := Run(context.Background(), Spec{ + Command: []string{"squirrel-no-such-binary-xyzzy"}, + Timeout: 5 * time.Second, + }) + if out.Succeeded() { + t.Fatalf("Succeeded = true, want failure") + } + if out.HasExitCode { + t.Fatalf("HasExitCode = true, want false for a process that never ran") + } + if out.TimedOut { + t.Fatalf("TimedOut = true, want false for spawn failure") + } +} + +func TestRunParentCancelled(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + cancel() + out := Run(ctx, Spec{ + Command: []string{"sh", "-c", "sleep 10"}, + Timeout: 5 * time.Second, + }) + if out.Succeeded() { + t.Fatalf("Succeeded = true, want failure on cancellation") + } + if out.TimedOut { + t.Fatalf("TimedOut = true, want a cancellation (not a timeout)") + } + if !strings.Contains(out.Err.Error(), "cancelled") { + t.Fatalf("Err = %v, want a cancellation message", out.Err) + } +} + +func TestRunEmptyCommand(t *testing.T) { + out := Run(context.Background(), Spec{Timeout: time.Second}) + if out.Succeeded() { + t.Fatalf("Succeeded = true, want failure for empty command") + } +} + +func TestRunNonPositiveTimeout(t *testing.T) { + out := Run(context.Background(), Spec{ + Command: []string{"sh", "-c", "exit 0"}, + Timeout: 0, + }) + if out.Succeeded() { + t.Fatalf("Succeeded = true, want failure for a non-positive timeout") + } + if out.TimedOut { + t.Fatalf("TimedOut = true, want a clear config error, not a phantom timeout") + } + if !strings.Contains(out.Err.Error(), "timeout must be positive") { + t.Fatalf("Err = %v, want a 'timeout must be positive' error", out.Err) + } +} + +func TestBoundedBufferKeepsTail(t *testing.T) { + // Single oversized write keeps the tail. + var b boundedBuffer + b.limit = 4 + n, err := b.Write([]byte("hello")) + if err != nil || n != 5 { + t.Fatalf("Write = (%d, %v), want (5, nil) — overflow is reported as written, not failed", n, err) + } + if b.String() != "ello" { + t.Fatalf("buffer = %q, want %q (the tail, not the head)", b.String(), "ello") + } + + // Tail is maintained across multiple writes. + var b2 boundedBuffer + b2.limit = 4 + if _, err := b2.Write([]byte("ab")); err != nil { + t.Fatalf("Write: %v", err) + } + if _, err := b2.Write([]byte("cdef")); err != nil { + t.Fatalf("Write: %v", err) + } + if b2.String() != "cdef" { + t.Fatalf("buffer = %q, want %q (last 4 bytes across writes)", b2.String(), "cdef") + } +} diff --git a/store/hookruns.go b/store/hookruns.go new file mode 100644 index 0000000..8ee2693 --- /dev/null +++ b/store/hookruns.go @@ -0,0 +1,175 @@ +package store + +import ( + "context" + "database/sql" + "fmt" +) + +// Hook triggers. A hook run is one exec of a user-configured per-volume +// command. 'change' fires after a successful index run that settled +// content; 'interval' fires on a cadence regardless of change. The +// discriminator is passed to the command as SQUIRREL_TRIGGER and recorded +// here so the two trigger histories stay distinguishable in the status +// surface. +const ( + HookTriggerChange = "change" + HookTriggerInterval = "interval" +) + +// Hook statuses. A hook run begins 'running' and FinishHookRun moves it to +// a terminal state. squirrel records pass/fail only — it never interprets +// what the command did, so there is no 'partial': a timeout or a non-zero +// exit are both 'failed'. The generic exit code carries the nuance. +const ( + HookStatusRunning = "running" + HookStatusSuccess = "success" + HookStatusFailed = "failed" +) + +// HookRun is one row in the hook_runs table — the generic outcome of one +// hook invocation. ExitCode is NULL when the process never produced one +// (spawn failure or timeout); Error is NULL on success and carries a +// short diagnostic (stderr tail, "timeout", spawn error) otherwise. +// TriggeringRunID references the index run that fired an on-change hook +// and is NULL for interval hooks (no run triggered them). Changed mirrors +// the SQUIRREL_CHANGED value the hook was passed. +type HookRun struct { + ID int64 + VolumeID int64 + Trigger string + TriggeringRunID sql.NullInt64 + Changed bool + StartedAtNs int64 + EndedAtNs sql.NullInt64 + Status string + ExitCode sql.NullInt64 + Error sql.NullString +} + +// HookRunSpec is the immutable context BeginHookRun records when a hook +// invocation starts. TriggeringRunID is set for on-change hooks and left +// zero (NULL) for interval hooks. +type HookRunSpec struct { + VolumeID int64 + Trigger string + TriggeringRunID int64 + Changed bool +} + +// hookRunColumns is the fixed projection for every read of a hook_runs +// row, kept in lockstep with scanHookRun's scan order. +const hookRunColumns = `id, volume_id, trigger, triggering_run_id, changed, started_at_ns, ended_at_ns, status, exit_code, error` + +func scanHookRun(scan func(...any) error) (HookRun, error) { + var r HookRun + err := scan(&r.ID, &r.VolumeID, &r.Trigger, &r.TriggeringRunID, &r.Changed, + &r.StartedAtNs, &r.EndedAtNs, &r.Status, &r.ExitCode, &r.Error) + return r, err +} + +func scanHookRunRow(s rowScanner) (HookRun, error) { + return scanHookRun(s.Scan) +} + +// BeginHookRun records the start of a hook invocation and returns its id. +// The trigger must be one of the HookTrigger* constants. The row begins +// in status 'running'; FinishHookRun moves it to a terminal state. +// +// The trigger and triggering run must agree (the schema enforces the same +// coupling): a 'change' hook is fired by an index run and must carry a +// non-zero TriggeringRunID; an 'interval' hook fires on the clock and must +// leave it zero (stored as NULL). Rejecting the mismatch here turns a +// wiring bug into a clear error rather than a CHECK-constraint failure. +func (s *Store) BeginHookRun(ctx context.Context, spec HookRunSpec) (int64, error) { + switch spec.Trigger { + case HookTriggerChange: + if spec.TriggeringRunID == 0 { + return 0, fmt.Errorf("BeginHookRun: a %q hook requires a triggering run id", HookTriggerChange) + } + case HookTriggerInterval: + if spec.TriggeringRunID != 0 { + return 0, fmt.Errorf("BeginHookRun: an %q hook must not carry a triggering run id (got %d)", HookTriggerInterval, spec.TriggeringRunID) + } + default: + return 0, fmt.Errorf("BeginHookRun: trigger must be %q or %q, got %q", HookTriggerChange, HookTriggerInterval, spec.Trigger) + } + var triggeringRun sql.NullInt64 + if spec.TriggeringRunID != 0 { + triggeringRun = sql.NullInt64{Int64: spec.TriggeringRunID, Valid: true} + } + res, err := s.db.ExecContext(ctx, ` + INSERT INTO hook_runs (volume_id, trigger, triggering_run_id, changed, started_at_ns, status) + VALUES (?, ?, ?, ?, ?, 'running') + `, spec.VolumeID, spec.Trigger, triggeringRun, spec.Changed, NowNs()) + if err != nil { + return 0, fmt.Errorf("insert hook run: %w", err) + } + id, err := res.LastInsertId() + if err != nil { + return 0, fmt.Errorf("hook run last insert id: %w", err) + } + return id, nil +} + +// FinishHookRun records the terminal state of a hook run. exitCode is +// stored as-is (pass an invalid sql.NullInt64 when the process produced +// no code, e.g. spawn failure or timeout); errMsg is stored as NULL when +// empty. Returns an error if id matches no row so a hook is never left +// stuck in 'running'. +func (s *Store) FinishHookRun(ctx context.Context, id int64, status string, exitCode sql.NullInt64, errMsg string) error { + if status != HookStatusSuccess && status != HookStatusFailed { + return fmt.Errorf("FinishHookRun: status must be %q or %q, got %q", HookStatusSuccess, HookStatusFailed, status) + } + var errVal sql.NullString + if errMsg != "" { + errVal = sql.NullString{String: errMsg, Valid: true} + } + res, err := s.db.ExecContext(ctx, ` + UPDATE hook_runs SET ended_at_ns = ?, status = ?, exit_code = ?, error = ? + WHERE id = ? + `, NowNs(), status, exitCode, errVal, id) + if err != nil { + return fmt.Errorf("finish hook run %d: %w", id, err) + } + n, err := res.RowsAffected() + if err != nil { + return fmt.Errorf("finish hook run %d rows affected: %w", id, err) + } + if n == 0 { + return fmt.Errorf("finish hook run %d: no such hook run", id) + } + return nil +} + +// HookRunListOpts filters and shapes ListHookRuns. The zero value returns +// every hook run, oldest first, with no cap. +type HookRunListOpts struct { + // VolumeID, when non-nil, restricts results to hooks for that volume. + VolumeID *int64 + // Limit caps the result count. Zero (or negative) means no cap. + Limit int + // Descending sorts by id descending (most recent first). + Descending bool +} + +// ListHookRuns returns hook runs matching opts. See HookRunListOpts for +// filter and ordering semantics. +func (s *Store) ListHookRuns(ctx context.Context, opts HookRunListOpts) ([]HookRun, error) { + query := `SELECT ` + hookRunColumns + ` FROM hook_runs` + var args []any + if opts.VolumeID != nil { + query += ` WHERE volume_id = ?` + args = append(args, *opts.VolumeID) + } + if opts.Descending { + query += ` ORDER BY id DESC` + } else { + query += ` ORDER BY id` + } + if opts.Limit > 0 { + query += ` LIMIT ?` + args = append(args, opts.Limit) + } + return queryRows(ctx, s.db, query, scanHookRunRow, args...) +} diff --git a/store/hookruns_test.go b/store/hookruns_test.go new file mode 100644 index 0000000..b0b6dcb --- /dev/null +++ b/store/hookruns_test.go @@ -0,0 +1,186 @@ +package store + +import ( + "context" + "database/sql" + "errors" + "testing" +) + +func TestHookRunLifecycle(t *testing.T) { + s := openTestStore(t) + ctx := context.Background() + vol, err := s.CreateVolume(ctx, "photos", "/tmp/photos") + if err != nil { + t.Fatalf("CreateVolume: %v", err) + } + // A change hook references the index run that fired it. + runID, err := s.BeginIndexRun(ctx, RunKindIndex, vol.ID, true) + if err != nil { + t.Fatalf("BeginIndexRun: %v", err) + } + + id, err := s.BeginHookRun(ctx, HookRunSpec{ + VolumeID: vol.ID, + Trigger: HookTriggerChange, + TriggeringRunID: runID, + Changed: true, + }) + if err != nil { + t.Fatalf("BeginHookRun: %v", err) + } + + got, err := s.hookRunByID(ctx, id) + if err != nil { + t.Fatalf("read back: %v", err) + } + if got.Status != HookStatusRunning { + t.Fatalf("status = %q, want running", got.Status) + } + if !got.TriggeringRunID.Valid || got.TriggeringRunID.Int64 != runID { + t.Fatalf("TriggeringRunID = %v, want %d", got.TriggeringRunID, runID) + } + if !got.Changed { + t.Fatalf("Changed = false, want true") + } + if got.EndedAtNs.Valid { + t.Fatalf("EndedAtNs set before finish") + } + + exit := sql.NullInt64{Int64: 0, Valid: true} + if err := s.FinishHookRun(ctx, id, HookStatusSuccess, exit, ""); err != nil { + t.Fatalf("FinishHookRun: %v", err) + } + got, err = s.hookRunByID(ctx, id) + if err != nil { + t.Fatalf("read back: %v", err) + } + if got.Status != HookStatusSuccess { + t.Fatalf("status = %q, want success", got.Status) + } + if !got.ExitCode.Valid || got.ExitCode.Int64 != 0 { + t.Fatalf("ExitCode = %v, want 0", got.ExitCode) + } + if !got.EndedAtNs.Valid { + t.Fatalf("EndedAtNs not set after finish") + } +} + +func TestBeginHookRunIntervalNoTriggeringRun(t *testing.T) { + s := openTestStore(t) + ctx := context.Background() + vol, err := s.CreateVolume(ctx, "v", "/tmp/v") + if err != nil { + t.Fatalf("CreateVolume: %v", err) + } + id, err := s.BeginHookRun(ctx, HookRunSpec{VolumeID: vol.ID, Trigger: HookTriggerInterval}) + if err != nil { + t.Fatalf("BeginHookRun: %v", err) + } + got, err := s.hookRunByID(ctx, id) + if err != nil { + t.Fatalf("read back: %v", err) + } + if got.TriggeringRunID.Valid { + t.Fatalf("TriggeringRunID = %v, want NULL for interval hook", got.TriggeringRunID) + } +} + +func TestBeginHookRunRejectsBadTrigger(t *testing.T) { + s := openTestStore(t) + ctx := context.Background() + vol, _ := s.CreateVolume(ctx, "v", "/tmp/v") + if _, err := s.BeginHookRun(ctx, HookRunSpec{VolumeID: vol.ID, Trigger: "bogus"}); err == nil { + t.Fatalf("expected error for bad trigger, got nil") + } +} + +func TestFinishHookRunRejectsBadStatus(t *testing.T) { + s := openTestStore(t) + ctx := context.Background() + vol, _ := s.CreateVolume(ctx, "v", "/tmp/v") + // Interval trigger needs no triggering run, keeping this focused on the + // FinishHookRun status validation. + id, err := s.BeginHookRun(ctx, HookRunSpec{VolumeID: vol.ID, Trigger: HookTriggerInterval}) + if err != nil { + t.Fatalf("BeginHookRun: %v", err) + } + if err := s.FinishHookRun(ctx, id, HookStatusRunning, sql.NullInt64{}, ""); err == nil { + t.Fatalf("expected error finishing with non-terminal status, got nil") + } +} + +func TestBeginHookRunRejectsTriggerRunMismatch(t *testing.T) { + s := openTestStore(t) + ctx := context.Background() + vol, _ := s.CreateVolume(ctx, "v", "/tmp/v") + // change without a triggering run is rejected. + if _, err := s.BeginHookRun(ctx, HookRunSpec{VolumeID: vol.ID, Trigger: HookTriggerChange}); err == nil { + t.Fatalf("expected error: change hook with no triggering run") + } + // interval with a triggering run is rejected. + if _, err := s.BeginHookRun(ctx, HookRunSpec{VolumeID: vol.ID, Trigger: HookTriggerInterval, TriggeringRunID: 1}); err == nil { + t.Fatalf("expected error: interval hook carrying a triggering run") + } +} + +func TestFinishHookRunUnknownID(t *testing.T) { + s := openTestStore(t) + if err := s.FinishHookRun(context.Background(), 9999, HookStatusFailed, sql.NullInt64{}, "x"); err == nil { + t.Fatalf("expected error for unknown hook run id, got nil") + } +} + +func TestListHookRuns(t *testing.T) { + s := openTestStore(t) + ctx := context.Background() + va, _ := s.CreateVolume(ctx, "a", "/tmp/a") + vb, _ := s.CreateVolume(ctx, "b", "/tmp/b") + // The change hook needs a real triggering run (the column is an FK and + // the trigger↔run coupling is enforced). + runID, err := s.BeginIndexRun(ctx, RunKindIndex, va.ID, true) + if err != nil { + t.Fatalf("BeginIndexRun: %v", err) + } + id1, _ := s.BeginHookRun(ctx, HookRunSpec{VolumeID: va.ID, Trigger: HookTriggerChange, TriggeringRunID: runID}) + id2, _ := s.BeginHookRun(ctx, HookRunSpec{VolumeID: vb.ID, Trigger: HookTriggerInterval}) + id3, _ := s.BeginHookRun(ctx, HookRunSpec{VolumeID: va.ID, Trigger: HookTriggerInterval}) + + all, err := s.ListHookRuns(ctx, HookRunListOpts{Descending: true}) + if err != nil { + t.Fatalf("ListHookRuns: %v", err) + } + if len(all) != 3 { + t.Fatalf("len = %d, want 3", len(all)) + } + if all[0].ID != id3 || all[2].ID != id1 { + t.Fatalf("descending order wrong: got %d..%d", all[0].ID, all[2].ID) + } + + onlyA, err := s.ListHookRuns(ctx, HookRunListOpts{VolumeID: &va.ID}) + if err != nil { + t.Fatalf("ListHookRuns volume filter: %v", err) + } + if len(onlyA) != 2 { + t.Fatalf("volume-filtered len = %d, want 2", len(onlyA)) + } + for _, h := range onlyA { + if h.VolumeID != va.ID { + t.Fatalf("volume filter leaked volume %d", h.VolumeID) + } + } + _ = id2 +} + +// hookRunByID reads a single hook run by id. It lives in the test +// file (not the package surface) because production code never fetches a +// hook run by primary key — it lists or, in the follow-up interval work, +// reads the latest per (volume, trigger). +func (s *Store) hookRunByID(ctx context.Context, id int64) (HookRun, error) { + row := s.db.QueryRowContext(ctx, `SELECT `+hookRunColumns+` FROM hook_runs WHERE id = ?`, id) + r, err := scanHookRun(row.Scan) + if errors.Is(err, sql.ErrNoRows) { + return HookRun{}, err + } + return r, err +} diff --git a/store/migrations.go b/store/migrations.go index 1c3bb8b..63163b0 100644 --- a/store/migrations.go +++ b/store/migrations.go @@ -10,7 +10,7 @@ import ( ) // SchemaVersion is the schema version this binary writes and reads. -const SchemaVersion = 12 +const SchemaVersion = 13 // freshSchemaBaseline is the version applied to a brand-new database. The // chain in `migrations` continues from here. v1 is no longer reachable from @@ -48,6 +48,7 @@ func buildMigrations(mctx migrationCtx) []migration { {version: 10, up: migrateV9ToV10}, {version: 11, up: migrateV10ToV11}, {version: 12, up: migrateV11ToV12}, + {version: 13, up: migrateV12ToV13}, } } @@ -1315,3 +1316,61 @@ func migrateV11ToV12(ctx context.Context, db *sql.DB) error { } return tx.Commit() } + +// migrateV12ToV13 adds the hook_runs table: the generic outcome record +// for the per-volume external-tool hooks (#84). A hook run is squirrel +// exec'ing a user-configured command on one of two triggers — 'change' +// (after a successful index run that settled content) or 'interval' (on +// a cadence, regardless of change) — and recording only the tool-agnostic +// result (exit code, timestamps, the triggering run). squirrel never +// parses what the command did, so the table carries no tool-specific +// columns. +// +// The table is additive and references existing parents (volumes, runs); +// no existing row is rewritten, so the content-immutability invariant on +// `files` is untouched. triggering_run_id is a nullable FK to runs(id): +// on-change hooks carry the index run that fired them; interval hooks +// leave it NULL because no run triggered them. +func migrateV12ToV13(ctx context.Context, db *sql.DB) error { + tx, err := db.BeginTx(ctx, nil) + if err != nil { + return err + } + defer tx.Rollback() + + stmts := []string{ + // The trigger↔triggering_run_id coupling mirrors the runs table's + // kind↔destination CHECK: a 'change' hook is always fired by an index + // run (so it carries that run id), while an 'interval' hook fires on + // the clock with no run behind it (so the id is NULL). Encoding it in + // the schema keeps the RUN column in `squirrel hooks`/TUI honest and + // turns a wiring bug into a loud failure instead of a NULL. + `CREATE TABLE hook_runs ( + id INTEGER PRIMARY KEY, + volume_id INTEGER NOT NULL REFERENCES volumes(id), + trigger TEXT NOT NULL CHECK (trigger IN ('change','interval')), + triggering_run_id INTEGER REFERENCES runs(id), + changed INTEGER NOT NULL CHECK (changed IN (0, 1)), + started_at_ns INTEGER NOT NULL, + ended_at_ns INTEGER, + status TEXT NOT NULL CHECK (status IN ('running','success','failed')), + exit_code INTEGER, + error TEXT, + CHECK ( + (trigger = 'change' AND triggering_run_id IS NOT NULL) OR + (trigger = 'interval' AND triggering_run_id IS NULL) + ) + )`, + // The cadence math (interval-hook due check) and the status surface + // both read the latest hook run per (volume, trigger); index the + // pair the queries order by. + `CREATE INDEX idx_hook_runs_volume_trigger ON hook_runs(volume_id, trigger, started_at_ns)`, + `INSERT INTO schema_version (version) VALUES (13)`, + } + for _, q := range stmts { + if _, err := tx.ExecContext(ctx, q); err != nil { + return fmt.Errorf("v12→v13: %w", err) + } + } + return tx.Commit() +} diff --git a/tui/hooks.go b/tui/hooks.go new file mode 100644 index 0000000..8fde03e --- /dev/null +++ b/tui/hooks.go @@ -0,0 +1,193 @@ +package tui + +import ( + "context" + "fmt" + "time" + + "github.com/charmbracelet/bubbles/table" + tea "github.com/charmbracelet/bubbletea" + "github.com/charmbracelet/lipgloss" + + "github.com/mbertschler/squirrel/store" +) + +// hooksModel lists recent external-tool hook runs (#84): the generic +// outcome squirrel records each time it nudged a per-volume command on a +// change or interval trigger. It is a read-only table mirroring the +// Volumes screen's shape — squirrel records pass/fail and an exit code +// only, never what the command did, so there is nothing to drill into. +type hooksModel struct { + store *store.Store + + width, height int + + table table.Model + + rows []store.HookRun + volumesByID map[int64]store.Volume + loaded bool + loadErr error +} + +type hooksDataMsg struct { + hooks []store.HookRun + volumes []store.Volume + err error +} + +func newHooksModel(s *store.Store) *hooksModel { + t := table.New(table.WithFocused(true), table.WithHeight(15)) + style := table.DefaultStyles() + style.Header = style.Header. + BorderStyle(lipgloss.NormalBorder()). + BorderForeground(colourMuted). + BorderBottom(true). + Bold(true) + style.Selected = style.Selected. + Foreground(lipgloss.Color("231")). + Background(colourAccent). + Bold(false) + t.SetStyles(style) + return &hooksModel{store: s, table: t} +} + +func (m *hooksModel) Init() tea.Cmd { return m.fetch() } + +func (m *hooksModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) { + switch msg := msg.(type) { + case tea.WindowSizeMsg: + m.width, m.height = msg.Width, msg.Height + h := msg.Height - 7 + if h < 5 { + h = 5 + } + m.table.SetHeight(h) + m.resizeColumns() + return m, nil + case tickMsg: + return m, m.fetch() + case hooksDataMsg: + m.loaded = true + m.loadErr = msg.err + if msg.err == nil { + m.rows = msg.hooks + m.volumesByID = make(map[int64]store.Volume, len(msg.volumes)) + for _, v := range msg.volumes { + m.volumesByID[v.ID] = v + } + m.refreshRows() + } + return m, nil + } + var cmd tea.Cmd + m.table, cmd = m.table.Update(msg) + return m, cmd +} + +func (m *hooksModel) View() string { + if !m.loaded && m.loadErr == nil { + return styleMuted.Render("loading hooks…") + } + if m.loadErr != nil { + return styleErr.Render(fmt.Sprintf("hooks error: %v", m.loadErr)) + } + if len(m.rows) == 0 { + return styleHeader.Render("Hooks") + "\n" + + styleMuted.Render("no hook runs yet — configure [volumes..hook] and run the agent") + } + header := styleHeader.Render(fmt.Sprintf("Hooks (%d)", len(m.rows))) + return header + "\n" + m.table.View() +} + +func (m *hooksModel) refreshRows() { + now := time.Now() + rows := make([]table.Row, 0, len(m.rows)) + for _, r := range m.rows { + rows = append(rows, table.Row{ + m.volumeName(r.VolumeID), + r.Trigger, + whenAgo(r.EndedAtNs, now), + hookRunDuration(r), + lipgloss.NewStyle().Foreground(statusColour(r.Status)).Render(glyphForStatus(r.Status) + " " + r.Status), + hookExitCode(r), + hookChanged(r), + }) + } + m.table.SetRows(rows) + m.resizeColumns() +} + +func (m *hooksModel) resizeColumns() { + cols := []table.Column{ + {Title: "VOLUME", Width: 16}, + {Title: "TRIGGER", Width: 10}, + {Title: "WHEN", Width: 12}, + {Title: "DURATION", Width: 10}, + {Title: "STATUS", Width: 11}, + {Title: "EXIT", Width: 6}, + {Title: "CHANGED", Width: 8}, + } + if m.width > 0 { + fixed := 0 + for _, c := range cols { + fixed += c.Width + } + fixed += 2 * (len(cols) - 1) + surplus := m.width - fixed - 4 + if surplus > 0 { + cols[0].Width += surplus + } + } + m.table.SetColumns(cols) +} + +func (m *hooksModel) volumeName(id int64) string { + if v, ok := m.volumesByID[id]; ok { + return v.Name + } + return fmt.Sprintf("vol#%d", id) +} + +func (m *hooksModel) fetch() tea.Cmd { + return func() tea.Msg { + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + hooks, err := m.store.ListHookRuns(ctx, store.HookRunListOpts{Limit: 500, Descending: true}) + if err != nil { + return hooksDataMsg{err: err} + } + vols, err := m.store.ListVolumes(ctx) + if err != nil { + return hooksDataMsg{err: err} + } + return hooksDataMsg{hooks: hooks, volumes: vols} + } +} + +// hookRunDuration renders a hook run's elapsed time, "—" while still +// running. +func hookRunDuration(r store.HookRun) string { + if !r.EndedAtNs.Valid { + return styleMuted.Render("—") + } + return humanDuration(time.Duration(r.EndedAtNs.Int64 - r.StartedAtNs)) +} + +// hookExitCode renders the recorded exit code, "—" when none was produced +// (a timeout or spawn failure leaves it NULL). +func hookExitCode(r store.HookRun) string { + if !r.ExitCode.Valid { + return styleMuted.Render("—") + } + return fmt.Sprintf("%d", r.ExitCode.Int64) +} + +// hookChanged renders the SQUIRREL_CHANGED value the hook was passed, so a +// "fired but nothing moved" no-op is distinguishable at a glance. +func hookChanged(r store.HookRun) string { + if r.Changed { + return "yes" + } + return styleMuted.Render("no") +} diff --git a/tui/tui.go b/tui/tui.go index 0e7f9f1..b11d895 100644 --- a/tui/tui.go +++ b/tui/tui.go @@ -39,6 +39,7 @@ const ( screenDashboard screen = iota screenRuns screenVolumes + screenHooks screenBrowse ) @@ -83,6 +84,7 @@ type rootModel struct { dashboard *dashboardModel runs *runsModel volumes *volumesModel + hooks *hooksModel browse *browseModel } @@ -95,6 +97,7 @@ func newRootModel(s *store.Store, cfg *config.Config) *rootModel { dashboard: dash, runs: newRunsModel(s), volumes: newVolumesModel(s), + hooks: newHooksModel(s), browse: newBrowseModel(s), } } @@ -118,6 +121,7 @@ func (m *rootModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) { cmds = append(cmds, forwardSize(m.dashboard, msg)) cmds = append(cmds, forwardSize(m.runs, msg)) cmds = append(cmds, forwardSize(m.volumes, msg)) + cmds = append(cmds, forwardSize(m.hooks, msg)) cmds = append(cmds, forwardSize(m.browse, msg)) return m, tea.Batch(cmds...) @@ -174,6 +178,8 @@ func (m *rootModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) { return m, m.switchTo(screenRuns) case "3": return m, m.switchTo(screenVolumes) + case "4": + return m, m.switchTo(screenHooks) case "esc": if m.active == screenBrowse { return m, m.switchTo(screenVolumes) @@ -231,7 +237,7 @@ func (m *rootModel) modalConsumesKey(key string) bool { // Browse is excluded because it is reached from Volumes, not from the // tab bar. func (m *rootModel) nextTab(delta int) screen { - tabs := []screen{screenDashboard, screenRuns, screenVolumes} + tabs := []screen{screenDashboard, screenRuns, screenVolumes, screenHooks} idx := 0 for i, t := range tabs { if t == m.active { @@ -266,6 +272,8 @@ func (m *rootModel) activeScreen() tea.Model { return m.runs case screenVolumes: return m.volumes + case screenHooks: + return m.hooks case screenBrowse: return m.browse } @@ -281,6 +289,7 @@ func (m *rootModel) renderTabs() string { {screenDashboard, "Dashboard", "1"}, {screenRuns, "Runs", "2"}, {screenVolumes, "Volumes", "3"}, + {screenHooks, "Hooks", "4"}, } var rendered []string for _, l := range labels { @@ -297,7 +306,7 @@ func (m *rootModel) renderTabs() string { } func (m *rootModel) renderStatusBar() string { - left := "q quit · tab / shift-tab switch · 1-3 jump" + left := "q quit · tab / shift-tab switch · 1-4 jump" if m.active == screenBrowse { left = "↑↓ navigate · enter descend · backspace ascend · esc / q back" }