diff --git a/AGENTS.md b/AGENTS.md index e378c0d..a707ea6 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -16,6 +16,22 @@ and operator-driven. Any new feature (sync, prune, dedup, GC) must preserve both: no deleting or overwriting history without an explicit, opt-in retention policy. +# Schema & migrations + +Real databases migrate through the forward-only Go registry in +`store/migrations.go` (a fresh DB applies the v5 baseline, then steps to +`SchemaVersion`). That chain is the source of truth — there are no `.sql` +migration files. + +`store/schema.sql` is a generated, flattened snapshot of the schema at +`SchemaVersion`, for humans and agents who want the current shape without +reading the whole migration chain. It does **not** bootstrap any database. +After changing the schema (adding a migration), regenerate it with +`go test ./store -update-schema`; the `TestSchemaSnapshot` golden test fails +on drift, so CI catches a stale snapshot. `squirrel db schema` prints the DDL +of a database directly (opening it runs migrations first), for inspecting a +real index without a repo checkout. + # Code quality Don't: diff --git a/cmd/squirrel/db.go b/cmd/squirrel/db.go index b7a9c59..8e9ac4e 100644 --- a/cmd/squirrel/db.go +++ b/cmd/squirrel/db.go @@ -14,13 +14,15 @@ import ( "github.com/mbertschler/squirrel/store" ) -// newDBCmd returns the `squirrel db` parent command. Subcommands cover -// the SQLite-side hygiene primitives that issue #65 wanted shipped as -// a coherent cluster: online backup via VACUUM INTO, integrity check -// via PRAGMA integrity_check, and snapshot-restore for rolling back to -// a known-good copy. The migration runner inside store.OpenWithOptions -// also calls Backup automatically before any schema-advancing -// migration; this command group is for the operator-facing surface. +// newDBCmd returns the `squirrel db` parent command. Most subcommands are +// the SQLite-side hygiene primitives that issue #65 wanted shipped as a +// coherent cluster: online backup via VACUUM INTO, integrity check via +// PRAGMA integrity_check, and snapshot-restore for rolling back to a +// known-good copy. `schema` joins them as an inspection primitive — it +// prints the live database's DDL. The migration runner inside +// store.OpenWithOptions also calls Backup automatically before any +// schema-advancing migration; this command group is for the +// operator-facing surface. func newDBCmd() *cobra.Command { cmd := &cobra.Command{ Use: "db", @@ -29,6 +31,7 @@ func newDBCmd() *cobra.Command { cmd.AddCommand(newDBBackupCmd()) cmd.AddCommand(newDBCheckCmd()) cmd.AddCommand(newDBRestoreCmd()) + cmd.AddCommand(newDBSchemaCmd()) return cmd } diff --git a/cmd/squirrel/db_schema.go b/cmd/squirrel/db_schema.go new file mode 100644 index 0000000..ee4e05b --- /dev/null +++ b/cmd/squirrel/db_schema.go @@ -0,0 +1,41 @@ +package main + +import ( + "fmt" + + "github.com/spf13/cobra" +) + +// newDBSchemaCmd returns `squirrel db schema`, which prints the DDL +// (tables, indexes, triggers) of whichever database the usual --db/config +// resolution opens, as a flattened script. Opening runs the normal +// migration chain first, so the output reflects the objects actually +// materialised in that file at the binary's SchemaVersion — useful for an +// operator or agent to inspect a real index directly, without a repo +// checkout of store/schema.sql. +func newDBSchemaCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "schema", + Short: "Print the index database's DDL (tables, indexes, triggers)", + RunE: func(cmd *cobra.Command, args []string) error { + return runDBSchema(cmd) + }, + } + return cmd +} + +func runDBSchema(cmd *cobra.Command) error { + cfg, _ := tryLoadConfig(cmd) // cfg may be nil; openStore handles that. + s, err := openStore(cmd, cfg) + if err != nil { + return err + } + defer s.Close() + + ddl, err := s.DumpSchema(cmd.Context()) + if err != nil { + return err + } + fmt.Fprint(cmd.OutOrStdout(), ddl) + return nil +} diff --git a/cmd/squirrel/db_test.go b/cmd/squirrel/db_test.go index f75a1fd..df2d363 100644 --- a/cmd/squirrel/db_test.go +++ b/cmd/squirrel/db_test.go @@ -107,6 +107,24 @@ func TestCLIDBRestoreRejectsSchemaMismatch(t *testing.T) { } } +// TestCLIDBSchemaPrintsDDL confirms `squirrel db schema` dumps the +// opened database's DDL, including the invariants the schema enforces: +// the foundational volumes table, the blake3-immutability trigger, and +// the one-live-row-per-path partial unique index. +func TestCLIDBSchemaPrintsDDL(t *testing.T) { + f := writeSyncFixture(t) + out := runCLI(t, "--config", f.configPath, "db", "schema") + for _, want := range []string{ + "CREATE TABLE volumes", + "CREATE TRIGGER files_blake3_immutable", + "uniq_files_live_per_path", + } { + if !strings.Contains(out, want) { + t.Fatalf("db schema output missing %q:\n%s", want, out) + } + } +} + func itoa(i int) string { if i == 0 { return "0" diff --git a/store/schema.go b/store/schema.go new file mode 100644 index 0000000..6a5b523 --- /dev/null +++ b/store/schema.go @@ -0,0 +1,91 @@ +package store + +import ( + "context" + "database/sql" + "fmt" + "strings" +) + +// schemaSnapshotNodeName is the self-node identity seeded into the +// throwaway in-memory database canonicalSchemaSQL migrates. It never +// reaches the dumped DDL (the nodes row is data, not schema), so any +// nodeNameRE-valid string works; a fixed one keeps generation +// deterministic. +const schemaSnapshotNodeName = "schema-snapshot" + +// schemaSnapshotHeader prefixes the generated store/schema.sql. The %d is +// filled with SchemaVersion. It states plainly that the file is generated +// and how to refresh it, so a human or agent who opens it isn't tempted to +// hand-edit a file the golden test will overwrite. +const schemaSnapshotHeader = "-- Generated by `go test ./store -update-schema` — DO NOT EDIT.\n" + + "--\n" + + "-- Flattened snapshot of the squirrel index schema at version %d, for humans\n" + + "-- and agents who want the current shape without replaying the migration\n" + + "-- chain in migrations.go. It is NOT used to create or migrate databases —\n" + + "-- a fresh DB is built by applyV5 plus the migration registry. The golden\n" + + "-- test TestSchemaSnapshot fails if this file drifts from that chain.\n" + +// DumpSchema returns the DDL currently materialised in the database as a +// deterministic SQL script: every CREATE statement recorded in +// sqlite_master (tables, indexes, triggers), grouped by table and ordered +// so the output is stable across runs. Indexes SQLite generates implicitly +// for PRIMARY KEY / UNIQUE constraints are omitted — they carry a NULL sql +// and are already implied by their table's CREATE. The CLI's `db schema` +// subcommand prints this for whichever database it opens. +func (s *Store) DumpSchema(ctx context.Context) (string, error) { + return dumpSchema(ctx, s.db) +} + +func dumpSchema(ctx context.Context, db *sql.DB) (string, error) { + rows, err := db.QueryContext(ctx, ` + SELECT sql FROM sqlite_master + WHERE sql IS NOT NULL AND name NOT LIKE 'sqlite_%' + ORDER BY tbl_name, + CASE type WHEN 'table' THEN 0 WHEN 'index' THEN 1 WHEN 'trigger' THEN 2 ELSE 3 END, + name`) + if err != nil { + return "", fmt.Errorf("read sqlite_master: %w", err) + } + defer rows.Close() + + var b strings.Builder + for rows.Next() { + var stmt string + if err := rows.Scan(&stmt); err != nil { + return "", fmt.Errorf("scan schema row: %w", err) + } + b.WriteString(strings.TrimSpace(stmt)) + b.WriteString(";\n\n") + } + if err := rows.Err(); err != nil { + return "", fmt.Errorf("iterate schema rows: %w", err) + } + return b.String(), nil +} + +// canonicalSchemaSQL migrates a throwaway in-memory database to +// SchemaVersion through the same chain Open uses, then returns the snapshot +// (header + DDL) that store/schema.sql must match. It is the single source +// of truth shared by the golden test and its -update-schema rewrite path. +// +// The in-memory DB is opened via buildDSN so it carries the same pragmas +// (foreign_keys, _txlock, …) a production Open applies — the snapshot is +// generated under the same constraints real databases migrate under. +func canonicalSchemaSQL(ctx context.Context) (string, error) { + db, err := openSQLite(buildDSN(":memory:")) + if err != nil { + return "", err + } + defer db.Close() + + s := &Store{db: db} // empty path → migrate skips the pre-migration backup + if err := s.migrate(ctx, schemaSnapshotNodeName, OpenOptions{DisablePreMigrationBackup: true}); err != nil { + return "", fmt.Errorf("migrate in-memory schema: %w", err) + } + body, err := s.DumpSchema(ctx) + if err != nil { + return "", err + } + return fmt.Sprintf(schemaSnapshotHeader, SchemaVersion) + "\n" + body, nil +} diff --git a/store/schema.sql b/store/schema.sql new file mode 100644 index 0000000..1424807 --- /dev/null +++ b/store/schema.sql @@ -0,0 +1,137 @@ +-- Generated by `go test ./store -update-schema` — DO NOT EDIT. +-- +-- Flattened snapshot of the squirrel index schema at version 13, for humans +-- and agents who want the current shape without replaying the migration +-- chain in migrations.go. It is NOT used to create or migrate databases — +-- a fresh DB is built by applyV5 plus the migration registry. The golden +-- test TestSchemaSnapshot fails if this file drifts from that chain. + +CREATE TABLE "files" ( + folder_id INTEGER NOT NULL REFERENCES folders(id), + name TEXT NOT NULL, + blake3 BLOB NOT NULL CHECK (length(blake3) = 32), + size_bytes INTEGER NOT NULL, + mtime_ns INTEGER NOT NULL, + status TEXT NOT NULL CHECK (status IN ('present','missing','superseded')), + first_seen_run_id INTEGER NOT NULL REFERENCES runs(id), + last_seen_run_id INTEGER NOT NULL REFERENCES runs(id), + indexed_at_ns INTEGER NOT NULL, + source_node_id INTEGER REFERENCES nodes(id), + source_run_id INTEGER REFERENCES runs(id), + PRIMARY KEY (folder_id, name, blake3) + ); + +CREATE INDEX idx_files_blake3 ON files(blake3, folder_id, name); + +CREATE INDEX idx_files_missing ON files(folder_id, name) WHERE status = 'missing'; + +CREATE INDEX idx_files_source_node ON files(source_node_id) + WHERE status = 'present' AND source_node_id IS NOT NULL; + +CREATE UNIQUE INDEX uniq_files_live_per_path ON files(folder_id, name) WHERE status != 'superseded'; + +CREATE TRIGGER files_blake3_immutable BEFORE UPDATE OF blake3 ON files + BEGIN + SELECT RAISE(ABORT, 'blake3 is immutable; supersede the row and insert a new one'); + END; + +CREATE TABLE folders ( + id INTEGER PRIMARY KEY, + volume_id INTEGER NOT NULL REFERENCES volumes(id), + parent_id INTEGER REFERENCES folders(id), + path TEXT NOT NULL, + shallow_blake3 BLOB CHECK (shallow_blake3 IS NULL OR length(shallow_blake3) = 32), + deep_blake3 BLOB CHECK (deep_blake3 IS NULL OR length(deep_blake3) = 32), + last_changed_run_id INTEGER REFERENCES runs(id), file_count INTEGER NOT NULL DEFAULT 0, cumulative_size INTEGER NOT NULL DEFAULT 0, + UNIQUE (volume_id, path) + ); + +CREATE INDEX idx_folders_parent ON folders(parent_id); + +CREATE TABLE hook_runs ( + id INTEGER PRIMARY KEY, + volume_id INTEGER NOT NULL REFERENCES volumes(id), + trigger TEXT NOT NULL CHECK (trigger IN ('change','interval')), + triggering_run_id INTEGER REFERENCES runs(id), + changed INTEGER NOT NULL CHECK (changed IN (0, 1)), + started_at_ns INTEGER NOT NULL, + ended_at_ns INTEGER, + status TEXT NOT NULL CHECK (status IN ('running','success','failed')), + exit_code INTEGER, + error TEXT, + CHECK ( + (trigger = 'change' AND triggering_run_id IS NOT NULL) OR + (trigger = 'interval' AND triggering_run_id IS NULL) + ) + ); + +CREATE INDEX idx_hook_runs_volume_trigger ON hook_runs(volume_id, trigger, started_at_ns); + +CREATE TABLE nodes ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL UNIQUE, + endpoint TEXT, + public_key_fingerprint TEXT + ); + +CREATE TABLE peer_sync_state ( + volume_id INTEGER NOT NULL REFERENCES volumes(id), + peer_node_id INTEGER NOT NULL REFERENCES nodes(id), + last_shared_run_id INTEGER, + last_synced_at INTEGER NOT NULL, + PRIMARY KEY (volume_id, peer_node_id) + ); + +CREATE TABLE peer_sync_state_history ( + id INTEGER PRIMARY KEY, + volume_id INTEGER NOT NULL REFERENCES volumes(id), + peer_node_id INTEGER NOT NULL REFERENCES nodes(id), + last_shared_run_id INTEGER, + last_synced_at INTEGER NOT NULL, + at_ns INTEGER NOT NULL + ); + +CREATE INDEX idx_peer_sync_history_pair + ON peer_sync_state_history(volume_id, peer_node_id); + +CREATE TABLE "runs" ( + id INTEGER PRIMARY KEY, + kind TEXT NOT NULL CHECK (kind IN ('index','sync','restore','audit')), + volume_id INTEGER REFERENCES volumes(id), + destination TEXT, + started_at_ns INTEGER NOT NULL, + ended_at_ns INTEGER, + status TEXT NOT NULL CHECK (status IN ('running','success','failed','partial')), + error TEXT, + file_count INTEGER NOT NULL DEFAULT 0, + peer_node_id INTEGER REFERENCES nodes(id), + correlated_run_id INTEGER, shallow INTEGER CHECK (shallow IS NULL OR shallow IN (0, 1)), + CHECK ( + (kind IN ('index','audit') AND destination IS NULL) OR + (kind IN ('sync','restore') AND destination IS NOT NULL AND destination != '') + ) + ); + +CREATE INDEX idx_runs_destination ON runs(destination) WHERE destination IS NOT NULL; + +CREATE INDEX idx_runs_volume_started ON runs(volume_id, started_at_ns); + +CREATE TABLE runs_audit ( + id INTEGER PRIMARY KEY, + run_id INTEGER NOT NULL REFERENCES runs(id), + transition TEXT NOT NULL, + operator TEXT, + at_ns INTEGER NOT NULL, + note TEXT + ); + +CREATE INDEX idx_runs_audit_run ON runs_audit(run_id); + +CREATE TABLE schema_version (version INTEGER NOT NULL PRIMARY KEY); + +CREATE TABLE volumes ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL UNIQUE, + path TEXT NOT NULL + ); + diff --git a/store/schema_test.go b/store/schema_test.go new file mode 100644 index 0000000..d2d5ebf --- /dev/null +++ b/store/schema_test.go @@ -0,0 +1,44 @@ +package store + +import ( + "context" + "flag" + "os" + "testing" +) + +// updateSchema, set via `go test ./store -update-schema`, rewrites the +// checked-in snapshot from the live migration chain instead of comparing +// against it. Running it is the sanctioned way to refresh schema.sql after +// adding a migration. +var updateSchema = flag.Bool("update-schema", false, "rewrite store/schema.sql from the current migration chain") + +const schemaSnapshotPath = "schema.sql" + +// TestSchemaSnapshot guards store/schema.sql against drift: the checked-in +// snapshot must equal the DDL produced by migrating a fresh database to +// SchemaVersion. A migration that changes the shape without regenerating +// the snapshot fails here, which keeps the human/agent-readable schema +// honest without anyone having to remember to refresh it. +func TestSchemaSnapshot(t *testing.T) { + want, err := canonicalSchemaSQL(context.Background()) + if err != nil { + t.Fatalf("generate canonical schema: %v", err) + } + + if *updateSchema { + if err := os.WriteFile(schemaSnapshotPath, []byte(want), 0o644); err != nil { + t.Fatalf("write %s: %v", schemaSnapshotPath, err) + } + t.Logf("wrote %s", schemaSnapshotPath) + return + } + + got, err := os.ReadFile(schemaSnapshotPath) + if err != nil { + t.Fatalf("read %s (run `go test ./store -update-schema` to create it): %v", schemaSnapshotPath, err) + } + if string(got) != want { + t.Errorf("%s is stale — run `go test ./store -update-schema` to regenerate it", schemaSnapshotPath) + } +}