Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,22 @@ and operator-driven.
Any new feature (sync, prune, dedup, GC) must preserve both: no deleting or
overwriting history without an explicit, opt-in retention policy.

# Schema & migrations

Real databases migrate through the forward-only Go registry in
`store/migrations.go` (a fresh DB applies the v5 baseline, then steps to
`SchemaVersion`). That chain is the source of truth — there are no `.sql`
migration files.

`store/schema.sql` is a generated, flattened snapshot of the schema at
`SchemaVersion`, for humans and agents who want the current shape without
reading the whole migration chain. It does **not** bootstrap any database.
After changing the schema (adding a migration), regenerate it with
`go test ./store -update-schema`; the `TestSchemaSnapshot` golden test fails
on drift, so CI catches a stale snapshot. `squirrel db schema` prints the DDL
of a database directly (opening it runs migrations first), for inspecting a
real index without a repo checkout.

# Code quality

Don't:
Expand Down
17 changes: 10 additions & 7 deletions cmd/squirrel/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@ import (
"github.com/mbertschler/squirrel/store"
)

// newDBCmd returns the `squirrel db` parent command. Subcommands cover
// the SQLite-side hygiene primitives that issue #65 wanted shipped as
// a coherent cluster: online backup via VACUUM INTO, integrity check
// via PRAGMA integrity_check, and snapshot-restore for rolling back to
// a known-good copy. The migration runner inside store.OpenWithOptions
// also calls Backup automatically before any schema-advancing
// migration; this command group is for the operator-facing surface.
// newDBCmd returns the `squirrel db` parent command. Most subcommands are
// the SQLite-side hygiene primitives that issue #65 wanted shipped as a
// coherent cluster: online backup via VACUUM INTO, integrity check via
// PRAGMA integrity_check, and snapshot-restore for rolling back to a
// known-good copy. `schema` joins them as an inspection primitive — it
// prints the live database's DDL. The migration runner inside
// store.OpenWithOptions also calls Backup automatically before any
// schema-advancing migration; this command group is for the
// operator-facing surface.
func newDBCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "db",
Expand All @@ -29,6 +31,7 @@ func newDBCmd() *cobra.Command {
cmd.AddCommand(newDBBackupCmd())
cmd.AddCommand(newDBCheckCmd())
cmd.AddCommand(newDBRestoreCmd())
cmd.AddCommand(newDBSchemaCmd())
return cmd
}

Expand Down
41 changes: 41 additions & 0 deletions cmd/squirrel/db_schema.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package main

import (
"fmt"

"github.com/spf13/cobra"
)

// newDBSchemaCmd returns `squirrel db schema`, which prints the DDL
// (tables, indexes, triggers) of whichever database the usual --db/config
// resolution opens, as a flattened script. Opening runs the normal
// migration chain first, so the output reflects the objects actually
// materialised in that file at the binary's SchemaVersion — useful for an
// operator or agent to inspect a real index directly, without a repo
// checkout of store/schema.sql.
func newDBSchemaCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "schema",
Short: "Print the index database's DDL (tables, indexes, triggers)",
RunE: func(cmd *cobra.Command, args []string) error {
return runDBSchema(cmd)
},
}
return cmd
}

func runDBSchema(cmd *cobra.Command) error {
cfg, _ := tryLoadConfig(cmd) // cfg may be nil; openStore handles that.
s, err := openStore(cmd, cfg)
if err != nil {
return err
}
defer s.Close()

ddl, err := s.DumpSchema(cmd.Context())
if err != nil {
return err
}
fmt.Fprint(cmd.OutOrStdout(), ddl)
return nil
}
18 changes: 18 additions & 0 deletions cmd/squirrel/db_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,24 @@ func TestCLIDBRestoreRejectsSchemaMismatch(t *testing.T) {
}
}

// TestCLIDBSchemaPrintsDDL confirms `squirrel db schema` dumps the
// opened database's DDL, including the invariants the schema enforces:
// the foundational volumes table, the blake3-immutability trigger, and
// the one-live-row-per-path partial unique index.
func TestCLIDBSchemaPrintsDDL(t *testing.T) {
f := writeSyncFixture(t)
out := runCLI(t, "--config", f.configPath, "db", "schema")
for _, want := range []string{
"CREATE TABLE volumes",
"CREATE TRIGGER files_blake3_immutable",
"uniq_files_live_per_path",
} {
if !strings.Contains(out, want) {
t.Fatalf("db schema output missing %q:\n%s", want, out)
}
}
}

func itoa(i int) string {
if i == 0 {
return "0"
Expand Down
91 changes: 91 additions & 0 deletions store/schema.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package store

import (
"context"
"database/sql"
"fmt"
"strings"
)

// schemaSnapshotNodeName is the self-node identity seeded into the
// throwaway in-memory database canonicalSchemaSQL migrates. It never
// reaches the dumped DDL (the nodes row is data, not schema), so any
// nodeNameRE-valid string works; a fixed one keeps generation
// deterministic.
const schemaSnapshotNodeName = "schema-snapshot"

// schemaSnapshotHeader prefixes the generated store/schema.sql. The %d is
// filled with SchemaVersion. It states plainly that the file is generated
// and how to refresh it, so a human or agent who opens it isn't tempted to
// hand-edit a file the golden test will overwrite.
const schemaSnapshotHeader = "-- Generated by `go test ./store -update-schema` — DO NOT EDIT.\n" +
"--\n" +
"-- Flattened snapshot of the squirrel index schema at version %d, for humans\n" +
"-- and agents who want the current shape without replaying the migration\n" +
"-- chain in migrations.go. It is NOT used to create or migrate databases —\n" +
"-- a fresh DB is built by applyV5 plus the migration registry. The golden\n" +
"-- test TestSchemaSnapshot fails if this file drifts from that chain.\n"

// DumpSchema returns the DDL currently materialised in the database as a
// deterministic SQL script: every CREATE statement recorded in
// sqlite_master (tables, indexes, triggers), grouped by table and ordered
// so the output is stable across runs. Indexes SQLite generates implicitly
// for PRIMARY KEY / UNIQUE constraints are omitted — they carry a NULL sql
// and are already implied by their table's CREATE. The CLI's `db schema`
// subcommand prints this for whichever database it opens.
func (s *Store) DumpSchema(ctx context.Context) (string, error) {
return dumpSchema(ctx, s.db)
}

func dumpSchema(ctx context.Context, db *sql.DB) (string, error) {
rows, err := db.QueryContext(ctx, `
SELECT sql FROM sqlite_master
WHERE sql IS NOT NULL AND name NOT LIKE 'sqlite_%'
ORDER BY tbl_name,
CASE type WHEN 'table' THEN 0 WHEN 'index' THEN 1 WHEN 'trigger' THEN 2 ELSE 3 END,
name`)
if err != nil {
return "", fmt.Errorf("read sqlite_master: %w", err)
}
defer rows.Close()

var b strings.Builder
for rows.Next() {
var stmt string
if err := rows.Scan(&stmt); err != nil {
return "", fmt.Errorf("scan schema row: %w", err)
}
b.WriteString(strings.TrimSpace(stmt))
b.WriteString(";\n\n")
}
if err := rows.Err(); err != nil {
return "", fmt.Errorf("iterate schema rows: %w", err)
}
return b.String(), nil
}

// canonicalSchemaSQL migrates a throwaway in-memory database to
// SchemaVersion through the same chain Open uses, then returns the snapshot
// (header + DDL) that store/schema.sql must match. It is the single source
// of truth shared by the golden test and its -update-schema rewrite path.
//
// The in-memory DB is opened via buildDSN so it carries the same pragmas
// (foreign_keys, _txlock, …) a production Open applies — the snapshot is
// generated under the same constraints real databases migrate under.
func canonicalSchemaSQL(ctx context.Context) (string, error) {
db, err := openSQLite(buildDSN(":memory:"))
if err != nil {
return "", err
}
defer db.Close()

s := &Store{db: db} // empty path → migrate skips the pre-migration backup
if err := s.migrate(ctx, schemaSnapshotNodeName, OpenOptions{DisablePreMigrationBackup: true}); err != nil {
return "", fmt.Errorf("migrate in-memory schema: %w", err)
}
body, err := s.DumpSchema(ctx)
if err != nil {
return "", err
}
return fmt.Sprintf(schemaSnapshotHeader, SchemaVersion) + "\n" + body, nil
}
137 changes: 137 additions & 0 deletions store/schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
-- Generated by `go test ./store -update-schema` — DO NOT EDIT.
--
-- Flattened snapshot of the squirrel index schema at version 13, for humans
-- and agents who want the current shape without replaying the migration
-- chain in migrations.go. It is NOT used to create or migrate databases —
-- a fresh DB is built by applyV5 plus the migration registry. The golden
-- test TestSchemaSnapshot fails if this file drifts from that chain.

CREATE TABLE "files" (
folder_id INTEGER NOT NULL REFERENCES folders(id),
name TEXT NOT NULL,
blake3 BLOB NOT NULL CHECK (length(blake3) = 32),
size_bytes INTEGER NOT NULL,
mtime_ns INTEGER NOT NULL,
status TEXT NOT NULL CHECK (status IN ('present','missing','superseded')),
first_seen_run_id INTEGER NOT NULL REFERENCES runs(id),
last_seen_run_id INTEGER NOT NULL REFERENCES runs(id),
indexed_at_ns INTEGER NOT NULL,
source_node_id INTEGER REFERENCES nodes(id),
source_run_id INTEGER REFERENCES runs(id),
PRIMARY KEY (folder_id, name, blake3)
);

CREATE INDEX idx_files_blake3 ON files(blake3, folder_id, name);

CREATE INDEX idx_files_missing ON files(folder_id, name) WHERE status = 'missing';

CREATE INDEX idx_files_source_node ON files(source_node_id)
WHERE status = 'present' AND source_node_id IS NOT NULL;

CREATE UNIQUE INDEX uniq_files_live_per_path ON files(folder_id, name) WHERE status != 'superseded';

CREATE TRIGGER files_blake3_immutable BEFORE UPDATE OF blake3 ON files
BEGIN
SELECT RAISE(ABORT, 'blake3 is immutable; supersede the row and insert a new one');
END;

CREATE TABLE folders (
id INTEGER PRIMARY KEY,
volume_id INTEGER NOT NULL REFERENCES volumes(id),
parent_id INTEGER REFERENCES folders(id),
path TEXT NOT NULL,
shallow_blake3 BLOB CHECK (shallow_blake3 IS NULL OR length(shallow_blake3) = 32),
deep_blake3 BLOB CHECK (deep_blake3 IS NULL OR length(deep_blake3) = 32),
last_changed_run_id INTEGER REFERENCES runs(id), file_count INTEGER NOT NULL DEFAULT 0, cumulative_size INTEGER NOT NULL DEFAULT 0,
UNIQUE (volume_id, path)
);

CREATE INDEX idx_folders_parent ON folders(parent_id);

CREATE TABLE hook_runs (
id INTEGER PRIMARY KEY,
volume_id INTEGER NOT NULL REFERENCES volumes(id),
trigger TEXT NOT NULL CHECK (trigger IN ('change','interval')),
triggering_run_id INTEGER REFERENCES runs(id),
changed INTEGER NOT NULL CHECK (changed IN (0, 1)),
started_at_ns INTEGER NOT NULL,
ended_at_ns INTEGER,
status TEXT NOT NULL CHECK (status IN ('running','success','failed')),
exit_code INTEGER,
error TEXT,
CHECK (
(trigger = 'change' AND triggering_run_id IS NOT NULL) OR
(trigger = 'interval' AND triggering_run_id IS NULL)
)
);

CREATE INDEX idx_hook_runs_volume_trigger ON hook_runs(volume_id, trigger, started_at_ns);

CREATE TABLE nodes (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
endpoint TEXT,
public_key_fingerprint TEXT
);

CREATE TABLE peer_sync_state (
volume_id INTEGER NOT NULL REFERENCES volumes(id),
peer_node_id INTEGER NOT NULL REFERENCES nodes(id),
last_shared_run_id INTEGER,
last_synced_at INTEGER NOT NULL,
PRIMARY KEY (volume_id, peer_node_id)
);

CREATE TABLE peer_sync_state_history (
id INTEGER PRIMARY KEY,
volume_id INTEGER NOT NULL REFERENCES volumes(id),
peer_node_id INTEGER NOT NULL REFERENCES nodes(id),
last_shared_run_id INTEGER,
last_synced_at INTEGER NOT NULL,
at_ns INTEGER NOT NULL
);

CREATE INDEX idx_peer_sync_history_pair
ON peer_sync_state_history(volume_id, peer_node_id);

CREATE TABLE "runs" (
id INTEGER PRIMARY KEY,
kind TEXT NOT NULL CHECK (kind IN ('index','sync','restore','audit')),
volume_id INTEGER REFERENCES volumes(id),
destination TEXT,
started_at_ns INTEGER NOT NULL,
ended_at_ns INTEGER,
status TEXT NOT NULL CHECK (status IN ('running','success','failed','partial')),
error TEXT,
file_count INTEGER NOT NULL DEFAULT 0,
peer_node_id INTEGER REFERENCES nodes(id),
correlated_run_id INTEGER, shallow INTEGER CHECK (shallow IS NULL OR shallow IN (0, 1)),
CHECK (
(kind IN ('index','audit') AND destination IS NULL) OR
(kind IN ('sync','restore') AND destination IS NOT NULL AND destination != '')
)
);

CREATE INDEX idx_runs_destination ON runs(destination) WHERE destination IS NOT NULL;

CREATE INDEX idx_runs_volume_started ON runs(volume_id, started_at_ns);

CREATE TABLE runs_audit (
id INTEGER PRIMARY KEY,
run_id INTEGER NOT NULL REFERENCES runs(id),
transition TEXT NOT NULL,
operator TEXT,
at_ns INTEGER NOT NULL,
note TEXT
);

CREATE INDEX idx_runs_audit_run ON runs_audit(run_id);

CREATE TABLE schema_version (version INTEGER NOT NULL PRIMARY KEY);

CREATE TABLE volumes (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
path TEXT NOT NULL
);

44 changes: 44 additions & 0 deletions store/schema_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package store

import (
"context"
"flag"
"os"
"testing"
)

// updateSchema, set via `go test ./store -update-schema`, rewrites the
// checked-in snapshot from the live migration chain instead of comparing
// against it. Running it is the sanctioned way to refresh schema.sql after
// adding a migration.
var updateSchema = flag.Bool("update-schema", false, "rewrite store/schema.sql from the current migration chain")

const schemaSnapshotPath = "schema.sql"

// TestSchemaSnapshot guards store/schema.sql against drift: the checked-in
// snapshot must equal the DDL produced by migrating a fresh database to
// SchemaVersion. A migration that changes the shape without regenerating
// the snapshot fails here, which keeps the human/agent-readable schema
// honest without anyone having to remember to refresh it.
func TestSchemaSnapshot(t *testing.T) {
want, err := canonicalSchemaSQL(context.Background())
if err != nil {
t.Fatalf("generate canonical schema: %v", err)
}

if *updateSchema {
if err := os.WriteFile(schemaSnapshotPath, []byte(want), 0o644); err != nil {
t.Fatalf("write %s: %v", schemaSnapshotPath, err)
}
t.Logf("wrote %s", schemaSnapshotPath)
return
}

got, err := os.ReadFile(schemaSnapshotPath)
if err != nil {
t.Fatalf("read %s (run `go test ./store -update-schema` to create it): %v", schemaSnapshotPath, err)
}
if string(got) != want {
t.Errorf("%s is stale — run `go test ./store -update-schema` to regenerate it", schemaSnapshotPath)
}
}
Loading