Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,28 @@ Supported destination types: `local`, `sftp`, `s3`, `b2`, `gcs`. Secrets accept

Squirrel writes its own `rclone.conf` next to the config (`~/.squirrel/rclone.conf`, mode 0600) on every sync invocation. You do not run `rclone config` and you should not edit `rclone.conf` by hand.

### Hooks

A volume can declare a per-volume **hook** — a command the agent runs to nudge an external tool when the volume's content changes. squirrel stays tool-agnostic: it never learns what the command does (a backup with kopia/restic, an `rclone copy`, a shell script — all the same to squirrel). It exec's the command **without a shell**, passes context through environment variables, and records only the generic outcome (exit code, timestamps).

```toml
[volumes.pictures.hook]
command = ["kopia", "snapshot", "create", "."]
timeout = "30m" # optional, defaults to 1h
```

The hook fires after a successful index run on the volume (which the agent runs on the `index_every` / `sync_every` cadence). It is **best-effort**: a hook failure or timeout never fails or blocks the run that triggered it, and overlapping invocations for the same volume are skipped rather than stacked. The command receives:

| Variable | Meaning |
|---|---|
| `SQUIRREL_VOLUME` | volume name |
| `SQUIRREL_PATH` | absolute volume path |
| `SQUIRREL_RUN_ID` | the index run that triggered the hook |
| `SQUIRREL_CHANGED` | `true`/`false` — whether the run observed changes (so the command can cheaply no-op) |
| `SQUIRREL_TRIGGER` | `change` |

Because the command is exec'd without a shell, the volume path is never string-concatenated into a command line. If you want shell features, make the command `["sh", "-c", "…"]` yourself. Recorded outcomes are visible via `squirrel hooks` and the TUI's Hooks tab.

## Quickstart

Index a configured volume:
Expand Down
186 changes: 186 additions & 0 deletions agent/hooks.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
package agent

import (
"context"
"database/sql"
"fmt"
"log/slog"
"strings"
"sync"
"time"

"github.com/mbertschler/squirrel/config"
"github.com/mbertschler/squirrel/hook"
"github.com/mbertschler/squirrel/store"
)

// hookRunner owns the lifecycle of per-volume external-tool hooks (#84):
// the don't-stack guard, the spawn/bound/reap goroutine, and the generic
// outcome recording. One per scheduler. All firing is best-effort — every
// failure is logged via the agent logger and never propagated, so a hook
// can neither fail nor block the run that triggered it.
//
// Hooks run in their own goroutine (not on the scheduler tick) so a
// long-running or wedged command can't stall cadence evaluation; the tick
// only ever pays for the synchronous BeginHookRun insert. wait() lets the
// scheduler drain in-flight hooks on shutdown.
type hookRunner struct {
store *store.Store
logger *slog.Logger

mu sync.Mutex
running map[int64]struct{} // volume ids with an in-flight invocation
wg sync.WaitGroup
}

func newHookRunner(s *store.Store, logger *slog.Logger) *hookRunner {
return &hookRunner{
store: s,
logger: logger,
running: make(map[int64]struct{}),
}
}

// fire launches the volume's hook for the given trigger if one is
// configured and no invocation is already in flight for that volume. It
// returns immediately: the command runs in a tracked goroutine bounded by
// the hook's timeout and by ctx (agent shutdown). triggeringRunID is the
// index run that fired an on-change hook (zero for interval hooks);
// changed is the SQUIRREL_CHANGED value to pass.
//
// A nil receiver is a no-op so tests can construct a bare scheduler
// without wiring a runner.
//
// trigger is always "change" until the interval caller lands in #86;
// keeping it a parameter keeps the foundation trigger-agnostic, hence the
// nolint until the second caller exercises the other value.
func (h *hookRunner) fire(ctx context.Context, vol *config.Volume, volumeID int64, trigger string, triggeringRunID int64, changed bool) { //nolint:unparam
if h == nil || vol.Hook == nil {
return
}
if !h.tryStart(volumeID) {
// Don't stack: a previous invocation for this volume is still
// running. The next trigger (or the external tool's own schedule)
// catches up — skipping is the specified behaviour, not an error.
h.logger.Info("hook.skipped",
"volume", vol.Name, "trigger", trigger,
"reason", "previous invocation still running")
return
}
id, err := h.store.BeginHookRun(ctx, store.HookRunSpec{
VolumeID: volumeID,
Trigger: trigger,
TriggeringRunID: triggeringRunID,
Changed: changed,
})
if err != nil {
h.logger.Error("hook.error",
"volume", vol.Name, "trigger", trigger,
"err", fmt.Sprintf("begin hook run: %v", err))
h.done(volumeID)
return
}
h.logger.Info("hook.kicked",
"volume", vol.Name, "trigger", trigger,
"hook_run_id", id, "run_id", triggeringRunID, "changed", changed)

h.wg.Add(1)
go func() {
defer h.wg.Done()
defer h.done(volumeID)
h.execute(ctx, vol, id, trigger, triggeringRunID, changed)
}()
}

// execute runs the command, then records the generic outcome. It runs on
// the hook goroutine; the recording uses a detached context so the outcome
// still lands even when ctx was cancelled by agent shutdown (which is what
// killed the command in the first place).
func (h *hookRunner) execute(ctx context.Context, vol *config.Volume, hookRunID int64, trigger string, triggeringRunID int64, changed bool) {
outcome := hook.Run(ctx, hook.Spec{
Command: vol.Hook.Command,
Volume: vol.Name,
Path: vol.Path,
RunID: triggeringRunID,
Changed: changed,
Trigger: hook.Trigger(trigger),
Timeout: vol.Hook.Timeout,
})
status, exitCode, errMsg := classifyOutcome(outcome)

finishCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := h.store.FinishHookRun(finishCtx, hookRunID, status, exitCode, errMsg); err != nil {
h.logger.Error("hook.error",
"volume", vol.Name, "trigger", trigger, "hook_run_id", hookRunID,
"err", fmt.Sprintf("finish hook run: %v", err))
}
h.logFinished(vol.Name, trigger, hookRunID, outcome, status)
}

// logFinished emits the terminal hook log line. Failures additionally log
// at error level with the diagnostic so an operator tailing the agent sees
// them without scanning the hook_runs table.
func (h *hookRunner) logFinished(volume, trigger string, hookRunID int64, outcome hook.Outcome, status string) {
duration := time.Duration(outcome.EndedAtNs - outcome.StartedAtNs)
attrs := []any{
"volume", volume, "trigger", trigger, "hook_run_id", hookRunID,
"status", status, "duration_ms", duration.Milliseconds(),
}
if outcome.HasExitCode {
attrs = append(attrs, "exit_code", outcome.ExitCode)
}
if outcome.TimedOut {
attrs = append(attrs, "timed_out", true)
}
h.logger.Info("hook.finished", attrs...)
if !outcome.Succeeded() {
h.logger.Error("hook.error",
"volume", volume, "trigger", trigger, "hook_run_id", hookRunID,
"err", outcome.Err.Error())
}
}

// classifyOutcome maps a hook.Outcome onto the store's generic columns. A
// process that produced an exit code records it (even on failure); a
// timeout or spawn failure leaves exit_code NULL. The error message folds
// in a stderr tail so the recorded row explains the failure on its own.
func classifyOutcome(outcome hook.Outcome) (status string, exitCode sql.NullInt64, errMsg string) {
if outcome.HasExitCode {
exitCode = sql.NullInt64{Int64: int64(outcome.ExitCode), Valid: true}
}
if outcome.Succeeded() {
return store.HookStatusSuccess, exitCode, ""
}
msg := outcome.Err.Error()
if tail := strings.TrimSpace(outcome.Stderr); tail != "" {
msg = msg + ": " + tail
}
return store.HookStatusFailed, exitCode, msg
}

func (h *hookRunner) tryStart(volumeID int64) bool {
h.mu.Lock()
defer h.mu.Unlock()
if _, ok := h.running[volumeID]; ok {
return false
}
h.running[volumeID] = struct{}{}
return true
}

func (h *hookRunner) done(volumeID int64) {
h.mu.Lock()
delete(h.running, volumeID)
h.mu.Unlock()
}

// wait blocks until every in-flight hook goroutine has finished. The
// scheduler calls it on shutdown; because hooks are timeout-bounded and
// ctx cancellation kills the command, it returns promptly.
func (h *hookRunner) wait() {
if h == nil {
return
}
h.wg.Wait()
}
Loading
Loading