From c4e874403679fb888bf8a13527ea95bd8211056a Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Thu, 4 Jun 2026 16:13:45 +0900 Subject: [PATCH 1/4] backup: M4-2b commit A - keymap loader (no integration yet) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per design doc PR #913 §'Per-bucket keymap loading'. Adds the read-side helper that inverts the S3 decoder's KEYMAP.jsonl writes, plus the per- record invariant validator. No call sites wired up yet — commit B integrates this into the encodeBucketObjects walk. New (encode_s3_collision.go): - loadBucketKeymap: opens /KEYMAP.jsonl via the read-side pipeline (root.Lstat -> refuseHardLink -> root.Open). MUST NOT use OpenSidecarFile - that helper is write-side on every platform (O_WRONLY|O_CREATE|O_TRUNC on Windows/fallback, O_WRONLY|O_CREATE + Truncate(0) on Unix) and would erase the file before reading. Codex P2 v913 v3. - readKeymapStrict: shared iterator over KeymapReader. Diverges from LoadKeymap (last-wins) — duplicate Encoded means corrupt dump, fail closed. Claude v913 v2. - validateKeymapRecord: per-record invariants — Kind in closed set {S3LeafData, MetaCollision, SHAFallback}; KindS3LeafData encoded must end in S3LeafDataSuffix; KindMetaCollision original must end in S3MetaSuffixReserved. - validateKeymapReservedRoot: rejects original keys whose first slash-split path component is _orphans or _incomplete_uploads. Whole _ namespace is NOT reserved - legit user keys like _foo / _foo/bar / nested/_orphans/x pass. Codex P2 v913 v1. - verifyKeymapTargetsExist: orphan-record detector (Encoded with no on-disk body). Runs once after load so operator sees the keymap inconsistency directly rather than as a later missing-sidecar error. O(N) Lstats per bucket. - resolveObjectKeyFromRel: lookup helper. Slash-form conversion + keymap lookup; nil keymap or miss returns slash-form rel as-is (no-collision M4-2a path). Tests (encode_s3_collision_test.go): - TestLoadBucketKeymap_HappyPath - three kinds round-trip into map. - TestLoadBucketKeymap_DuplicateEncoded - fail-closed regression for the LoadKeymap-divergence contract (claude v913 v2). - TestLoadBucketKeymap_MalformedJSON - wraps ErrInvalidKeymapRecord. - TestLoadBucketKeymap_MissingFile - (nil, nil) defensive return. - TestValidateKeymapRecord_KindLeafSuffix - 3-case table. - TestValidateKeymapRecord_KindMetaSuffix - good + bad. - TestValidateKeymapRecord_UnknownKind - forward-compat guard (claude v913 v2). - TestValidateKeymapReservedRoot - 9-case table covering boundary cases: _orphans (reject), _foo (accept), _orphansFoo (accept), nested/_orphans (accept), etc. Codex P2 v913 v1. - TestVerifyKeymapTargetsExist - present + orphan subtests. - TestResolveObjectKeyFromRel - 4-case lookup contract. Caller audit: pure additions; no existing call site changed in this commit. The loader / validator are unreachable from the encoder graph until commit B wires them into encodeBucketObjects. go test + golangci-lint clean on ./internal/backup/. --- internal/backup/encode_s3_collision.go | 239 ++++++++++++++ internal/backup/encode_s3_collision_test.go | 348 ++++++++++++++++++++ 2 files changed, 587 insertions(+) create mode 100644 internal/backup/encode_s3_collision.go create mode 100644 internal/backup/encode_s3_collision_test.go diff --git a/internal/backup/encode_s3_collision.go b/internal/backup/encode_s3_collision.go new file mode 100644 index 00000000..5cda2111 --- /dev/null +++ b/internal/backup/encode_s3_collision.go @@ -0,0 +1,239 @@ +package backup + +import ( + "io" + "os" + "path/filepath" + "strings" + + "github.com/cockroachdb/errors" +) + +// encode_s3_collision.go is the M4-2b helper that inverts the S3 +// decoder's collision-rename writes. The decoder writes a per-bucket +// KEYMAP.jsonl with one record per renamed object (KindS3LeafData, +// KindMetaCollision, KindSHAFallback); this file is the read-side that +// turns that file back into a map[encoded-rel-path]KeymapRecord the +// object walk consults to recover original S3 keys. +// +// Design: docs/design/2026_06_03_proposed_s3_collision_reversal.md. +// +// File-system safety: loadBucketKeymap re-runs the read-side +// Lstat → refuseHardLink → Open sequence rather than reusing +// OpenSidecarFile. OpenSidecarFile is write-side +// (O_WRONLY|O_CREATE|O_TRUNC on Windows/fallback, O_WRONLY|O_CREATE +// + Truncate(0) on Unix) and would erase KEYMAP.jsonl before the +// reader could see it. Codex P2 v913 v3. +// +// Duplicate-key contract: this loader DIVERGES from LoadKeymap +// (keymap.go:245) which is last-wins. The S3 decoder's recordKeymap +// writes exactly one entry per renamed object, so a duplicate +// `Encoded` value in a tracker file means the dump is corrupt — the +// loader fails closed. Claude v913 v2 caught this contract mismatch. + +// keymapReservedRoots is the closed set of first-path-component +// names disallowed as the head of a KEYMAP `Original` key. Only +// _orphans/ and _incomplete_uploads/ are dump-control reserved +// subtrees; legitimate user object keys like "_foo" or "_foo/bar" +// remain valid. Codex P2 v913 v1. +var keymapReservedRoots = map[string]struct{}{ + "_orphans": {}, + "_incomplete_uploads": {}, +} + +// keymapAllowedKinds is the closed set of Kind values M4-2b honors. +// A Kind outside this set fails closed so a hand-edited dump that +// injects a novel discriminator cannot silently bypass invariants +// (forward-compat guard, claude v913 v2). +var keymapAllowedKinds = map[string]struct{}{ + KindS3LeafData: {}, + KindMetaCollision: {}, + KindSHAFallback: {}, +} + +// loadBucketKeymap reads /KEYMAP.jsonl into a +// map[Encoded]KeymapRecord. Precondition: the caller has already +// verified via isKeymapCollisionTracker that the file exists, is a +// regular file, and has no companion .elastickv-meta.json sidecar +// (sidecar-paired KEYMAP.jsonl is a legitimate user object handled +// by the normal object walk; codex P2 v913 v2). +// +// All four invariants in the design doc's "Error contract" section +// fire from here — duplicate Encoded, malformed JSON, suffix- +// mismatch per Kind, reserved-root, and unknown Kind. Orphan-record +// detection (Encoded with no on-disk body) happens in +// verifyKeymapTargetsExist after the load, because it requires the +// full keymap to be parsed first. +func (e *S3RecordEncoder) loadBucketKeymap(root *os.Root, bucketDir string) (map[string]KeymapRecord, error) { + keymapRel := filepath.Join(bucketDir, "KEYMAP.jsonl") + // Read-side Lstat + refuseHardLink + Open. NOT OpenSidecarFile + // (write-side; truncates before read). Mirrors openRootRegular + // (encode_s3_objects.go:260) but tolerates os.ErrNotExist as a + // no-op return (defensive — caller's precondition already + // covered the Lstat once, but interleaved with our re-Lstat the + // file could vanish; treat that as "no records" rather than + // failing closed). + linfo, err := root.Lstat(keymapRel) + if errors.Is(err, os.ErrNotExist) { + return nil, nil + } + if err != nil { + return nil, errors.WithStack(err) + } + if !linfo.Mode().IsRegular() { + return nil, errors.Wrapf(ErrS3EncodeNotRegular, "%s (mode=%s)", keymapRel, linfo.Mode()) + } + if err := refuseHardLink(linfo, keymapRel); err != nil { + return nil, err + } + f, err := root.Open(keymapRel) + if err != nil { + return nil, errors.WithStack(err) + } + defer func() { _ = f.Close() }() + + return readKeymapStrict(f, keymapRel) +} + +// readKeymapStrict iterates a KEYMAP.jsonl stream and assembles the +// map, failing closed on duplicate Encoded values (vs LoadKeymap's +// last-wins) and on every per-record invariant. Split out of +// loadBucketKeymap so the parent stays under the cyclop limit. +func readKeymapStrict(f io.Reader, keymapRel string) (map[string]KeymapRecord, error) { + out := make(map[string]KeymapRecord) + rd := NewKeymapReader(f) + for { + rec, ok, err := rd.Next() + if err != nil { + return nil, err + } + if !ok { + return out, nil + } + if _, dup := out[rec.Encoded]; dup { + return nil, errors.Wrapf(ErrInvalidKeymapRecord, + "%s: duplicate encoded segment %q (S3 decoder writes one record per rename; a duplicate means the dump is corrupt)", + keymapRel, rec.Encoded) + } + if err := validateKeymapRecord(rec); err != nil { + return nil, errors.Wrapf(err, "%s", keymapRel) + } + out[rec.Encoded] = rec + } +} + +// validateKeymapRecord enforces the per-record invariants of the +// design doc's "Error contract" section: Kind must be in the closed +// set; KindS3LeafData entries must end in S3LeafDataSuffix; +// KindMetaCollision entries' original key must end in +// S3MetaSuffixReserved; no entry's original key may target a +// reserved top-level dump subtree. +func validateKeymapRecord(rec KeymapRecord) error { + if _, ok := keymapAllowedKinds[rec.Kind]; !ok { + return errors.Wrapf(ErrInvalidKeymapRecord, + "unknown kind %q for encoded segment %q", rec.Kind, rec.Encoded) + } + originalBytes, err := rec.Original() + if err != nil { + return err + } + original := string(originalBytes) + if err := validateKeymapReservedRoot(rec, original); err != nil { + return err + } + switch rec.Kind { + case KindS3LeafData: + if !strings.HasSuffix(rec.Encoded, S3LeafDataSuffix) { + return errors.Wrapf(ErrInvalidKeymapRecord, + "KindS3LeafData encoded %q missing %q suffix", rec.Encoded, S3LeafDataSuffix) + } + case KindMetaCollision: + if !strings.HasSuffix(original, S3MetaSuffixReserved) { + return errors.Wrapf(ErrInvalidKeymapRecord, + "KindMetaCollision original %q missing %q suffix", original, S3MetaSuffixReserved) + } + case KindSHAFallback: + // Design §"KindSHAFallback policy for S3": currently + // reserved for the (not-yet-implemented) case where a + // single S3 key segment exceeds EncodeSegment's 240-byte + // filesystem ceiling. The decoder does not emit it for S3 + // today, but if a future decoder does, the encoder's + // full-rel-path lookup handles it transparently. No + // additional invariant beyond the kind+reserved-root + // checks above. + } + return nil +} + +// validateKeymapReservedRoot rejects an original key whose first +// slash-split path component is exactly one of the dump-control +// reserved subtrees (_orphans, _incomplete_uploads). Codex P2 v913 v1: +// the whole `_` namespace is NOT reserved, only those two named +// subtrees. Legit user keys like "_foo" or "_foo/bar" or +// "nested/_orphans/x" pass. +func validateKeymapReservedRoot(rec KeymapRecord, original string) error { + if original == "" { + return nil + } + firstSegment, _, _ := strings.Cut(original, "/") + if _, reserved := keymapReservedRoots[firstSegment]; reserved { + return errors.Wrapf(ErrInvalidKeymapRecord, + "original key %q targets reserved dump subtree %q (encoded=%q, kind=%q)", + original, firstSegment, rec.Encoded, rec.Kind) + } + return nil +} + +// verifyKeymapTargetsExist scans the loaded keymap and confirms each +// record's Encoded value points to an actual on-disk file under +// bucketDir. Orphan records (Encoded with no body) surface here +// rather than as a missing-sidecar error from the walker, so the +// operator sees the keymap inconsistency directly. O(N) Lstats per +// bucket, run once at load time. +func (e *S3RecordEncoder) verifyKeymapTargetsExist(root *os.Root, bucketDir string, km map[string]KeymapRecord) error { + for encoded := range km { + // Encoded is a bucket-relative path in slash-form (that's + // what the decoder's recordKeymap stored; see s3.go:728 + // — the call site passes resolveObjectFilename's output, + // which is the slash-form S3 key + optional rename + // suffix). Convert to filepath form before Lstat. + rel := filepath.Join(bucketDir, filepath.FromSlash(encoded)) + _, err := root.Lstat(rel) + if errors.Is(err, os.ErrNotExist) { + return errors.Wrapf(ErrInvalidKeymapRecord, + "orphan keymap record: encoded %q has no on-disk file under %s", encoded, bucketDir) + } + if err != nil { + return errors.WithStack(err) + } + } + return nil +} + +// resolveObjectKeyFromRel maps an on-disk bucket-relative path to +// the original S3 object key. The keymap is the loaded +// per-bucket map (nil for buckets with no KEYMAP.jsonl); a hit +// returns the decoded original key bytes as a string, a miss +// returns the slash-form of objRel itself (the no-collision case +// M4-2a already covered). +// +// The lookup uses the slash-form of objRel because the decoder's +// recordKeymap call site (s3.go:728) stored that exact form as +// the Encoded field (the encodedFilename argument is the +// resolveObjectFilename return, which is plain slash-form S3 +// path + optional suffix). +func resolveObjectKeyFromRel(keymap map[string]KeymapRecord, objRel string) (string, error) { + slashRel := filepath.ToSlash(objRel) + if keymap == nil { + return slashRel, nil + } + rec, ok := keymap[slashRel] + if !ok { + return slashRel, nil + } + originalBytes, err := rec.Original() + if err != nil { + return "", err + } + return string(originalBytes), nil +} diff --git a/internal/backup/encode_s3_collision_test.go b/internal/backup/encode_s3_collision_test.go new file mode 100644 index 00000000..62008d26 --- /dev/null +++ b/internal/backup/encode_s3_collision_test.go @@ -0,0 +1,348 @@ +package backup + +import ( + "encoding/base64" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/cockroachdb/errors" +) + +// writeS3KeymapTracker writes /s3//KEYMAP.jsonl +// with the given records and NO companion .elastickv-meta.json +// sidecar, so isKeymapCollisionTracker classifies it as a tracker. +func writeS3KeymapTracker(t *testing.T, root, bucket string, records []KeymapRecord) { + t.Helper() + bucketDir := filepath.Join(root, "s3", EncodeSegment([]byte(bucket))) + if err := os.MkdirAll(bucketDir, 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + f, err := os.Create(filepath.Join(bucketDir, "KEYMAP.jsonl")) + if err != nil { + t.Fatalf("Create KEYMAP.jsonl: %v", err) + } + defer func() { _ = f.Close() }() + w := NewKeymapWriter(f) + for _, rec := range records { + if err := w.Write(rec); err != nil { + t.Fatalf("Write %+v: %v", rec, err) + } + } + if err := w.Close(); err != nil { + t.Fatalf("Close: %v", err) + } +} + +// loadKeymapForBucketB is a thin test wrapper around the new loader +// that opens the standard test bucket "b" via os.OpenRoot the same +// way Encode does. Bucket name is hardcoded since every loader test +// uses the same fixture name. +func loadKeymapForBucketB(t *testing.T, inRoot string) (map[string]KeymapRecord, error) { + t.Helper() + enc := NewS3RecordEncoder(inRoot) + r, err := os.OpenRoot(filepath.Join(inRoot, "s3")) + if err != nil { + t.Fatalf("OpenRoot: %v", err) + } + defer func() { _ = r.Close() }() + return enc.loadBucketKeymap(r, EncodeSegment([]byte("b"))) +} + +func leafRecord(encoded, originalSlashKey string) KeymapRecord { + return KeymapRecord{ + Encoded: encoded, + OriginalB64: base64.RawURLEncoding.EncodeToString([]byte(originalSlashKey)), + Kind: KindS3LeafData, + } +} + +func metaRecord(encoded, originalSlashKey string) KeymapRecord { + return KeymapRecord{ + Encoded: encoded, + OriginalB64: base64.RawURLEncoding.EncodeToString([]byte(originalSlashKey)), + Kind: KindMetaCollision, + } +} + +// TestLoadBucketKeymap_HappyPath verifies that three valid records +// (one per accepted Kind) load into a map keyed by Encoded. +func TestLoadBucketKeymap_HappyPath(t *testing.T) { + t.Parallel() + in := t.TempDir() + const bucket = "b" + records := []KeymapRecord{ + leafRecord("path/to"+S3LeafDataSuffix, "path/to"), + metaRecord("renamed.user-data", "real"+S3MetaSuffixReserved), + { + Encoded: "shorthash__truncated", + OriginalB64: base64.RawURLEncoding.EncodeToString([]byte("very-long-original-key")), + Kind: KindSHAFallback, + }, + } + writeS3KeymapTracker(t, in, bucket, records) + + got, err := loadKeymapForBucketB(t, in) + if err != nil { + t.Fatalf("loadBucketKeymap: %v", err) + } + if len(got) != 3 { + t.Fatalf("loaded %d records, want 3", len(got)) + } + for _, rec := range records { + if loaded, ok := got[rec.Encoded]; !ok { + t.Errorf("missing record for encoded %q", rec.Encoded) + } else if loaded.Kind != rec.Kind { + t.Errorf("record %q: kind = %q, want %q", rec.Encoded, loaded.Kind, rec.Kind) + } + } +} + +// TestLoadBucketKeymap_DuplicateEncoded pins the divergence from +// LoadKeymap's last-wins behavior: a duplicate Encoded value means +// the S3 decoder wrote two distinct rename targets for the same +// on-disk name, which is a corrupt dump the encoder cannot +// disambiguate. Claude v913 v2 documented this contract. +func TestLoadBucketKeymap_DuplicateEncoded(t *testing.T) { + t.Parallel() + in := t.TempDir() + writeS3KeymapTracker(t, in, "b", []KeymapRecord{ + leafRecord("path/to"+S3LeafDataSuffix, "path/to"), + leafRecord("path/to"+S3LeafDataSuffix, "elsewhere"), + }) + _, err := loadKeymapForBucketB(t, in) + if !errors.Is(err, ErrInvalidKeymapRecord) { + t.Fatalf("err = %v, want wrap of ErrInvalidKeymapRecord", err) + } + if !strings.Contains(err.Error(), "duplicate encoded segment") { + t.Fatalf("err = %v, want duplicate-encoded message", err) + } +} + +// TestLoadBucketKeymap_MalformedJSON pins that a corrupted JSONL +// line surfaces ErrInvalidKeymapRecord (via the KeymapReader, not a +// loader-side check — same outcome either way for the operator). +func TestLoadBucketKeymap_MalformedJSON(t *testing.T) { + t.Parallel() + in := t.TempDir() + const bucket = "b" + bucketDir := filepath.Join(in, "s3", EncodeSegment([]byte(bucket))) + if err := os.MkdirAll(bucketDir, 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + if err := os.WriteFile(filepath.Join(bucketDir, "KEYMAP.jsonl"), + []byte("{this is not json}\n"), 0o600); err != nil { + t.Fatalf("WriteFile: %v", err) + } + _, err := loadKeymapForBucketB(t, in) + if !errors.Is(err, ErrInvalidKeymapRecord) { + t.Fatalf("err = %v, want ErrInvalidKeymapRecord", err) + } +} + +// TestLoadBucketKeymap_MissingFile pins that a missing +// KEYMAP.jsonl returns (nil, nil) rather than failing. The caller +// (isKeymapCollisionTracker) is the precondition gate; this +// behavior is defensive against an interleaving racy unlink. +func TestLoadBucketKeymap_MissingFile(t *testing.T) { + t.Parallel() + in := t.TempDir() + bucketDir := filepath.Join(in, "s3", EncodeSegment([]byte("b"))) + if err := os.MkdirAll(bucketDir, 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + got, err := loadKeymapForBucketB(t, in) + if err != nil { + t.Fatalf("loadBucketKeymap on missing KEYMAP.jsonl: %v", err) + } + if got != nil { + t.Fatalf("got = %v, want nil for missing keymap", got) + } +} + +// TestValidateKeymapRecord_KindLeafSuffix pins gate "KindS3LeafData +// encoded must end in .elastickv-leaf-data". +func TestValidateKeymapRecord_KindLeafSuffix(t *testing.T) { + t.Parallel() + cases := []struct { + name string + encoded string + wantErr bool + }{ + {"valid suffix", "path/to" + S3LeafDataSuffix, false}, + {"missing suffix", "path/to", true}, + {"wrong suffix", "path/to.user-data", true}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + t.Parallel() + rec := leafRecord(c.encoded, "path/to") + err := validateKeymapRecord(rec) + if c.wantErr && !errors.Is(err, ErrInvalidKeymapRecord) { + t.Fatalf("err = %v, want ErrInvalidKeymapRecord", err) + } + if !c.wantErr && err != nil { + t.Fatalf("unexpected err = %v", err) + } + }) + } +} + +// TestValidateKeymapRecord_KindMetaSuffix pins gate +// "KindMetaCollision original must end in .elastickv-meta.json". +func TestValidateKeymapRecord_KindMetaSuffix(t *testing.T) { + t.Parallel() + good := metaRecord("renamed.user-data", "real"+S3MetaSuffixReserved) + if err := validateKeymapRecord(good); err != nil { + t.Fatalf("valid record: unexpected err = %v", err) + } + bad := metaRecord("renamed.user-data", "real-without-suffix") + if err := validateKeymapRecord(bad); !errors.Is(err, ErrInvalidKeymapRecord) { + t.Fatalf("err = %v, want ErrInvalidKeymapRecord", err) + } +} + +// TestValidateKeymapRecord_UnknownKind pins the forward-compat +// guard: an unrecognized Kind value fails closed rather than +// silently passing. +func TestValidateKeymapRecord_UnknownKind(t *testing.T) { + t.Parallel() + rec := KeymapRecord{ + Encoded: "x", + OriginalB64: base64.RawURLEncoding.EncodeToString([]byte("orig")), + Kind: "future-rename-kind", + } + err := validateKeymapRecord(rec) + if !errors.Is(err, ErrInvalidKeymapRecord) { + t.Fatalf("err = %v, want ErrInvalidKeymapRecord", err) + } +} + +// TestValidateKeymapReservedRoot covers the boundary cases codex P2 +// v913 v1 caught: the whole `_` namespace is NOT reserved, only +// _orphans/ and _incomplete_uploads/ at top level. +func TestValidateKeymapReservedRoot(t *testing.T) { + t.Parallel() + cases := []struct { + name string + key string + wantErr bool + }{ + {"_orphans top-level", "_orphans/x", true}, + {"_incomplete_uploads top-level", "_incomplete_uploads/x", true}, + {"_orphans bare (no slash, first segment matches)", "_orphans", true}, + {"_foo (legit user key with leading underscore)", "_foo", false}, + {"_foo/bar", "_foo/bar", false}, + {"_orphansFoo (longer name, NOT reserved)", "_orphansFoo/x", false}, + {"nested _orphans (only top-level reserved)", "nested/_orphans/x", false}, + {"plain user key", "path/to", false}, + {"empty key", "", false}, // skip case; reserved check only fires on non-empty + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + t.Parallel() + rec := KeymapRecord{ + Encoded: "anything", + OriginalB64: base64.RawURLEncoding.EncodeToString([]byte(c.key)), + Kind: KindS3LeafData, + } + err := validateKeymapReservedRoot(rec, c.key) + if c.wantErr && !errors.Is(err, ErrInvalidKeymapRecord) { + t.Fatalf("err = %v, want ErrInvalidKeymapRecord", err) + } + if !c.wantErr && err != nil { + t.Fatalf("unexpected err = %v", err) + } + }) + } +} + +// verifyKeymapTargetsExistCase runs one verifyKeymapTargetsExist +// invocation against a freshly-opened os.Root so the resource +// lifetime stays tied to the subtest (parent-level defer + parallel +// subtests would close the root before the subtests run). +func verifyKeymapTargetsExistCase(t *testing.T, in string, km map[string]KeymapRecord) error { + t.Helper() + enc := NewS3RecordEncoder(in) + r, err := os.OpenRoot(filepath.Join(in, "s3")) + if err != nil { + t.Fatalf("OpenRoot: %v", err) + } + defer func() { _ = r.Close() }() + return enc.verifyKeymapTargetsExist(r, EncodeSegment([]byte("b")), km) +} + +// TestVerifyKeymapTargetsExist pins that a keymap record citing an +// on-disk file that doesn't exist surfaces as a wrapped +// ErrInvalidKeymapRecord, not as a later missing-sidecar error. +// Run once at load time so the operator sees the keymap +// inconsistency directly. +func TestVerifyKeymapTargetsExist(t *testing.T) { + t.Parallel() + in := t.TempDir() + bucketDir := filepath.Join(in, "s3", EncodeSegment([]byte("b"))) + if err := os.MkdirAll(filepath.Join(bucketDir, "path"), 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + if err := os.WriteFile(filepath.Join(bucketDir, "path", "to"+S3LeafDataSuffix), []byte("body"), 0o600); err != nil { + t.Fatalf("WriteFile body: %v", err) + } + + t.Run("present", func(t *testing.T) { + t.Parallel() + km := map[string]KeymapRecord{ + "path/to" + S3LeafDataSuffix: leafRecord("path/to"+S3LeafDataSuffix, "path/to"), + } + if err := verifyKeymapTargetsExistCase(t, in, km); err != nil { + t.Fatalf("verifyKeymapTargetsExist: %v", err) + } + }) + + t.Run("orphan record", func(t *testing.T) { + t.Parallel() + km := map[string]KeymapRecord{ + "missing/file" + S3LeafDataSuffix: leafRecord("missing/file"+S3LeafDataSuffix, "missing/file"), + } + err := verifyKeymapTargetsExistCase(t, in, km) + if !errors.Is(err, ErrInvalidKeymapRecord) { + t.Fatalf("err = %v, want ErrInvalidKeymapRecord", err) + } + if !strings.Contains(err.Error(), "orphan keymap record") { + t.Fatalf("err message = %v, want orphan-keymap-record", err) + } + }) +} + +// TestResolveObjectKeyFromRel covers the lookup contract: nil +// keymap is the no-collision case (just slash-convert); a hit +// returns decoded Original; a miss returns the slash-form rel. +func TestResolveObjectKeyFromRel(t *testing.T) { + t.Parallel() + keymap := map[string]KeymapRecord{ + "path/to" + S3LeafDataSuffix: leafRecord("path/to"+S3LeafDataSuffix, "path/to"), + } + cases := []struct { + name string + keymap map[string]KeymapRecord + objRel string + want string + }{ + {"nil keymap, no collision", nil, "path/to", "path/to"}, + {"empty keymap, no collision", map[string]KeymapRecord{}, "path/to", "path/to"}, + {"hit on leaf-data rename", keymap, "path/to" + S3LeafDataSuffix, "path/to"}, + {"miss returns rel as-is", keymap, "other/key", "other/key"}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + t.Parallel() + got, err := resolveObjectKeyFromRel(c.keymap, c.objRel) + if err != nil { + t.Fatalf("resolveObjectKeyFromRel: %v", err) + } + if got != c.want { + t.Fatalf("got %q, want %q", got, c.want) + } + }) + } +} From 2de1c94b80d624d6ace49171e35db99fd7db8054 Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Thu, 4 Jun 2026 16:16:13 +0900 Subject: [PATCH 2/4] backup: M4-2b commit B - integrate keymap into object walk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per design doc PR #913 §'Object-walk integration'. Wires the loader from commit A into encodeBucketObjects and threads the loaded keymap through walkObjects -> walkObjectEntry -> encodeObject. For each on-disk object whose bucket-relative path appears in the keymap, the encoder now emits records under the recovered original S3 key rather than the renamed on-disk path. Sentinel removal: - ErrS3EncodeUnsupportedCollision removed - the encoder now reverses the collision renames it used to fail closed on. All wrap sites internal to this package were inside encodeBucketObjects (the now- replaced fail-closed branch) plus the test deleted below. grep confirms no external callers under /Users/bootjp/src/elastickv2. Integration points: - encodeBucketObjects: when isKeymapCollisionTracker reports true, loadBucketKeymap + verifyKeymapTargetsExist run before the walk. Both errors wrap to ErrS3EncodeInvalidBucket so the CLI's ErrEncodeAdapterData mark routes them to exit-2 unchanged. When the tracker is absent the keymap stays nil and the walk runs the M4-2a path byte-identically. - walkObjects / walkObjectEntry / walkObjectSubdir / encodeObject all gain a 'keymap map[string]KeymapRecord' parameter (nil-safe). encodeObject swaps filepath.ToSlash(objRel) for resolveObjectKeyFromRel(keymap, objRel). - walkObjectEntry: explicit skip for the top-level 'KEYMAP.jsonl' filename when keymap is non-nil. Keeps the single disambiguation-decision contract in isKeymapCollisionTracker - the walker never needs to re-derive whether KEYMAP.jsonl is a tracker (loadBucketKeymap already consumed it). Comment update: - encode_snapshot.go:655 docstring no longer cites ErrS3EncodeUnsupportedCollision (now removed). Substituted ErrS3EncodeReservedPrefixCollision + ErrS3EncodeInvalidBucket which still exist and play the same exit-2-routing example role. Test removal: - TestS3EncodeRejectsKeymapCollisionTracker deleted. The fixture (KEYMAP.jsonl with no sidecar) is exactly what M4-2b now reverses; commit C will add a positive round-trip test on the same shape. Tests preserved unchanged: - TestS3EncodeKeymapObjectRoundTrip - legitimate user 'KEYMAP.jsonl' with sidecar still round-trips (proves isKeymapCollisionTracker's sidecar disambiguation gates correctly; codex P2 v913 v2). - TestS3EncodeLeafDataObjectRoundTrip - a real object whose key ends in .elastickv-leaf-data + sidecar + NO keymap entry continues to take the legacy no-collision path. Caller audit (semantic changes): - walkObjects / walkObjectEntry / walkObjectSubdir / encodeObject - all unexported; single production call chain from encodeBucketObjects. All updated in this commit. - ErrS3EncodeUnsupportedCollision: grep on /Users/bootjp/src/elastickv2 finds zero references after this commit. go test + golangci-lint clean on ./internal/backup/. --- internal/backup/encode_s3_objects.go | 78 ++++++++++++++--------- internal/backup/encode_s3_objects_test.go | 18 ------ internal/backup/encode_snapshot.go | 5 +- 3 files changed, 52 insertions(+), 49 deletions(-) diff --git a/internal/backup/encode_s3_objects.go b/internal/backup/encode_s3_objects.go index bf7a0165..a4b9c582 100644 --- a/internal/backup/encode_s3_objects.go +++ b/internal/backup/encode_s3_objects.go @@ -52,12 +52,6 @@ const ( s3RestoreUploadID = "elastickv-restore" ) -// ErrS3EncodeUnsupportedCollision is returned when the dump carries an -// object-name collision artifact (KEYMAP.jsonl or a .elastickv-leaf-data -// file) that this slice does not yet reverse — failing closed avoids -// emitting a record under the wrong object key. -var ErrS3EncodeUnsupportedCollision = errors.New("backup: s3 encode object-name collision not yet supported") - // ErrS3EncodeReservedPrefixCollision is returned when a user object key // collides with a reserved dump-control directory (_incomplete_uploads/ // or _orphans/) — the decoder writes such keys at their natural path @@ -79,24 +73,33 @@ var ErrS3EncodeInvalidManifest = errors.New("backup: s3 encode invalid object si // object's manifest + blob records. // // Object-name collisions: the decoder writes a top-level KEYMAP.jsonl -// recording shorter keys renamed to .elastickv-leaf-data. M4-2a does -// not reverse those renames, so a collision-tracker KEYMAP.jsonl fails -// closed. It is distinguished from a legitimate user object named -// "KEYMAP.jsonl" (which the decoder also emits verbatim) by the absence of -// a companion .elastickv-meta.json sidecar — the tracker has none. The -// .elastickv-leaf-data suffix is NOT special-cased: a real object whose -// key ends in it has a sidecar and round-trips normally, and any actual -// collision is already gated by the tracker check above (codex P1 #845). +// recording shorter keys renamed to .elastickv-leaf-data (and the +// --rename-collisions variants for .elastickv-meta.json-suffixed user +// keys). M4-2b reverses those renames: when isKeymapCollisionTracker +// reports the tracker is present, loadBucketKeymap reads the file and +// the walk consults the resulting map to recover original S3 keys. +// A legitimate user object literally named "KEYMAP.jsonl" (which has +// a companion sidecar) keeps the M4-2a normal-object-body path — +// isKeymapCollisionTracker disambiguates by sidecar absence. The +// .elastickv-leaf-data suffix on a real object key (sidecar present, +// no keymap entry) likewise stays on the normal path. Design doc PR +// #913. func (e *S3RecordEncoder) encodeBucketObjects(b *snapshotBuilder, root *os.Root, bucketDir, bucketName string) error { tracker, err := e.isKeymapCollisionTracker(root, bucketDir) if err != nil { return err } + var keymap map[string]KeymapRecord if tracker { - return errors.Wrapf(ErrS3EncodeUnsupportedCollision, - "%s: collision-rename KEYMAP.jsonl present", bucketDir) + keymap, err = e.loadBucketKeymap(root, bucketDir) + if err != nil { + return errors.Wrapf(ErrS3EncodeInvalidBucket, "%s: %v", bucketDir, err) + } + if err := e.verifyKeymapTargetsExist(root, bucketDir, keymap); err != nil { + return errors.Wrapf(ErrS3EncodeInvalidBucket, "%s: %v", bucketDir, err) + } } - return e.walkObjects(b, root, bucketDir, bucketName, "") + return e.walkObjects(b, root, bucketDir, bucketName, "", keymap) } // isKeymapCollisionTracker reports whether the bucket's top-level @@ -174,14 +177,14 @@ func reservedDirHoldsSidecar(root *os.Root, dirRel string) (bool, error) { // regular file as an object body whose key is its path relative to // bucketDir. The reserved top-level entries (_bucket.json, // _incomplete_uploads/, _orphans/) are skipped. -func (e *S3RecordEncoder) walkObjects(b *snapshotBuilder, root *os.Root, bucketDir, bucketName, rel string) error { +func (e *S3RecordEncoder) walkObjects(b *snapshotBuilder, root *os.Root, bucketDir, bucketName, rel string, keymap map[string]KeymapRecord) error { entries, err := readRootSubdirEntries(root, filepath.Join(bucketDir, rel)) if err != nil { return err } for _, ent := range entries { childRel := filepath.Join(rel, ent.Name()) - if err := e.walkObjectEntry(b, root, bucketDir, bucketName, rel, childRel, ent); err != nil { + if err := e.walkObjectEntry(b, root, bucketDir, bucketName, rel, childRel, ent, keymap); err != nil { return err } } @@ -191,23 +194,33 @@ func (e *S3RecordEncoder) walkObjects(b *snapshotBuilder, root *os.Root, bucketD // walkObjectEntry classifies one directory entry: reserved skips, // sidecars, collision artifacts (fail closed), sub-directories (recurse), // or an object body. -func (e *S3RecordEncoder) walkObjectEntry(b *snapshotBuilder, root *os.Root, bucketDir, bucketName, rel, childRel string, ent os.DirEntry) error { +func (e *S3RecordEncoder) walkObjectEntry(b *snapshotBuilder, root *os.Root, bucketDir, bucketName, rel, childRel string, ent os.DirEntry, keymap map[string]KeymapRecord) error { name := ent.Name() if ent.IsDir() { - return e.walkObjectSubdir(b, root, bucketDir, bucketName, rel, childRel) + return e.walkObjectSubdir(b, root, bucketDir, bucketName, rel, childRel, keymap) } switch { case rel == "" && name == "_bucket.json": return nil + case rel == "" && name == "KEYMAP.jsonl" && keymap != nil: + // The collision-tracker file itself, NOT a user object. The + // tracker has no companion sidecar (encodeObject would fail + // closed reading a missing sidecar); explicitly skip it here so + // the disambiguation contract in isKeymapCollisionTracker + // remains the single decision point. A legitimate user object + // named KEYMAP.jsonl is handled by the default branch because + // keymap stays nil in that case (isKeymapCollisionTracker + // returned false). + return nil case strings.HasSuffix(name, S3MetaSuffixReserved): return nil // sidecar, handled with its body default: // Any other regular file (including a user object literally named // "KEYMAP.jsonl" or ending in .elastickv-leaf-data) is an object - // body; encodeObject reads its sidecar and fails closed if absent. - // A sidecar-less collision-tracker KEYMAP.jsonl was already gated - // in encodeBucketObjects. - return e.encodeObject(b, root, bucketDir, bucketName, childRel) + // body; encodeObject reads its sidecar and fails closed if + // absent. The keymap (if present) is consulted in encodeObject + // to recover the original S3 key for renamed entries. + return e.encodeObject(b, root, bucketDir, bucketName, childRel, keymap) } } @@ -220,10 +233,10 @@ func (e *S3RecordEncoder) walkObjectEntry(b *snapshotBuilder, root *os.Root, buc // those) also lands here; its sidecar makes that detectable, and we // fail closed rather than silently drop the user data (codex P1 #842 // follow-up). -func (e *S3RecordEncoder) walkObjectSubdir(b *snapshotBuilder, root *os.Root, bucketDir, bucketName, rel, childRel string) error { +func (e *S3RecordEncoder) walkObjectSubdir(b *snapshotBuilder, root *os.Root, bucketDir, bucketName, rel, childRel string, keymap map[string]KeymapRecord) error { name := filepath.Base(childRel) if rel != "" || (name != "_incomplete_uploads" && name != "_orphans") { - return e.walkObjects(b, root, bucketDir, bucketName, childRel) + return e.walkObjects(b, root, bucketDir, bucketName, childRel, keymap) } hasUserObject, err := reservedDirHoldsSidecar(root, filepath.Join(bucketDir, childRel)) if err != nil { @@ -239,8 +252,15 @@ func (e *S3RecordEncoder) walkObjectSubdir(b *snapshotBuilder, root *os.Root, bu // encodeObject reads one object's sidecar, streams its body into blob // records, and stages the manifest. The object key is the body's path // relative to bucketDir. -func (e *S3RecordEncoder) encodeObject(b *snapshotBuilder, root *os.Root, bucketDir, bucketName, objRel string) error { - objectKey := filepath.ToSlash(objRel) +func (e *S3RecordEncoder) encodeObject(b *snapshotBuilder, root *os.Root, bucketDir, bucketName, objRel string, keymap map[string]KeymapRecord) error { + // resolveObjectKeyFromRel does the slash-form conversion and + // keymap lookup. nil keymap or miss returns slash-form of objRel + // (the no-collision M4-2a path); a hit returns the decoded + // original key bytes from KEYMAP.jsonl. + objectKey, err := resolveObjectKeyFromRel(keymap, objRel) + if err != nil { + return errors.Wrapf(ErrS3EncodeInvalidBucket, "%s: %v", bucketDir, err) + } bodyRel := filepath.Join(bucketDir, objRel) sidecar, err := e.readObjectSidecar(root, bodyRel+S3MetaSuffixReserved) if err != nil { diff --git a/internal/backup/encode_s3_objects_test.go b/internal/backup/encode_s3_objects_test.go index 648141ca..d8098872 100644 --- a/internal/backup/encode_s3_objects_test.go +++ b/internal/backup/encode_s3_objects_test.go @@ -238,24 +238,6 @@ func TestS3EncodeKeymapDirPrefixObjectRoundTrip(t *testing.T) { } } -// TestS3EncodeRejectsKeymapCollisionTracker pins fail-closed when a bucket -// carries a collision-tracker KEYMAP.jsonl — identified by the ABSENCE of -// a companion .elastickv-meta.json sidecar (M4-2a does not reverse renames). -func TestS3EncodeRejectsKeymapCollisionTracker(t *testing.T) { - t.Parallel() - in := t.TempDir() - const bucket = "b" - writeS3Bucket(t, in, bucket, []byte(`{"format_version":1,"name":"b"}`)) - km := filepath.Join(in, "s3", EncodeSegment([]byte(bucket)), "KEYMAP.jsonl") - if err := os.WriteFile(km, []byte("{}\n"), 0o600); err != nil { - t.Fatalf("write KEYMAP: %v", err) - } - b := newSnapshotBuilder(s3EncTS) - if err := NewS3RecordEncoder(in).Encode(b); !errors.Is(err, ErrS3EncodeUnsupportedCollision) { - t.Fatalf("Encode err = %v, want ErrS3EncodeUnsupportedCollision", err) - } -} - // TestS3EncodeKeymapObjectRoundTrip pins that a legitimate user object // named "KEYMAP.jsonl" (one with a companion sidecar) round-trips, rather // than being mistaken for the collision tracker (codex P1 #845). diff --git a/internal/backup/encode_snapshot.go b/internal/backup/encode_snapshot.go index dfc38c43..6fef3c24 100644 --- a/internal/backup/encode_snapshot.go +++ b/internal/backup/encode_snapshot.go @@ -652,8 +652,9 @@ func adapterRunners() []adapterRunner { // route them to exit-2 (data-correctness) rather than exit-1 (user // error). The original adapter sentinel chain is preserved — callers // that errors.Is on ErrDDBEncodeInvalidSchema, -// ErrS3EncodeUnsupportedCollision, etc. still see those (codex P2 v9 -// #904; phantom-sentinel doc fix from claude v10 #904). +// ErrS3EncodeReservedPrefixCollision, ErrS3EncodeInvalidBucket, etc. +// still see those (codex P2 v9 #904; phantom-sentinel doc fix from +// claude v10 #904). func runAdapterEncoders(b *snapshotBuilder, opts EncodeOptions) ([]string, error) { var enabled []string for _, r := range adapterRunners() { From 92b14f56c5fca36e78d4d4dea336ce1324308b4e Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Thu, 4 Jun 2026 16:20:47 +0900 Subject: [PATCH 3/4] backup: M4-2b commit C - round-trip + integration test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per design doc PR #913 §'Test plan'. Adds the 8 design-doc tests covering the end-to-end encoder behavior: - TestS3EncodeRoundTripsLeafDataCollision - both .elastickv-leaf-data (renamed shorter key) and /sub (longer key) round-trip; emitted !s3|obj|head| manifest records are under the recovered original S3 keys with no .elastickv-leaf-data suffix leak. - TestS3EncodeRoundTripsMetaCollision - the --rename-collisions=true variant for a user object whose key naturally ends in .elastickv-meta.json is reversed via KindMetaCollision. - TestS3EncodeRejectsKeymapTargetingReservedSubtree - 2-case sub-test matrix on _orphans/foo and _incomplete_uploads/x; integration-level pin for the design's reserved-subtree gate. - TestS3EncodeAcceptsKeymapWithUserUnderscoreKey - codex P2 v913 v1 narrow-reserved-rule: a single underscore prefix (_foo) is a legit user key and round-trips. - TestS3EncodeRejectsOrphanKeymapEntry - integration-level fail-closed for a keymap entry whose Encoded has no on-disk body. - TestS3EncodeRejectsDuplicateKeymapEntry - integration-level fail-closed for two records sharing an Encoded value. - TestS3EncodeRejectsMalformedKeymapJSON - integration-level fail-closed for an unparseable line. - TestS3EncodeMissingKeymapIsValidNoCollisionDump - regression that the M4-2a happy path is unchanged when no KEYMAP.jsonl is present. The integration tests assert at the !s3|obj|head| record-byte level via assertS3ManifestEmittedUnder rather than re-running the decoder. A full decoder round-trip would re-apply the renames (or reject the .elastickv-meta.json-suffixed key without --rename-collisions=true), obscuring whether the encoder recovered the original key. Helper shape adjustments to satisfy unparam: - assertS3ManifestEmittedUnder bakes 'b' as the bucket via the new collisionTestBucket package constant. - writeS3KeymapTracker drops its bucket parameter; uses the same shared constant. Existing tests preserved unchanged (the legitimacy gate from claude v913 v2 codex P2 still applies): - TestS3EncodeKeymapObjectRoundTrip - legitimate user object named KEYMAP.jsonl (with sidecar) still round-trips. - TestS3EncodeLeafDataObjectRoundTrip - a legit object key ending in .elastickv-leaf-data (with sidecar, no keymap entry) keeps the M4-2a no-collision path. go test + golangci-lint clean on ./internal/backup/. --- .../encode_s3_collision_roundtrip_test.go | 266 ++++++++++++++++++ internal/backup/encode_s3_collision_test.go | 11 +- 2 files changed, 272 insertions(+), 5 deletions(-) create mode 100644 internal/backup/encode_s3_collision_roundtrip_test.go diff --git a/internal/backup/encode_s3_collision_roundtrip_test.go b/internal/backup/encode_s3_collision_roundtrip_test.go new file mode 100644 index 00000000..35be2f59 --- /dev/null +++ b/internal/backup/encode_s3_collision_roundtrip_test.go @@ -0,0 +1,266 @@ +package backup + +import ( + "bytes" + "encoding/base64" + "os" + "path/filepath" + "testing" + + "github.com/bootjp/elastickv/internal/s3keys" + "github.com/cockroachdb/errors" +) + +// encode_s3_collision_roundtrip_test.go is the M4-2b commit C suite: +// end-to-end round-trips covering the 8 tests called out in design +// doc PR #913 §"Test plan". Unit-level loader / validator tests live +// alongside their helper in encode_s3_collision_test.go. + +// writeS3LeafDataRename writes both the renamed body +// (.elastickv-leaf-data) and its sidecar under the +// bucket, mirroring what the decoder produces when two object keys +// collide on a file/dir boundary. Caller is responsible for writing +// any colliding "subtree" objects (e.g. path/to/sub). +func writeS3LeafDataRename(t *testing.T, root, bucket, originalKey string, body, sidecar []byte) { + t.Helper() + writeS3Object(t, root, bucket, originalKey+S3LeafDataSuffix, body, sidecar) +} + +// writeS3MetaCollisionRename writes the meta-collision rename +// target () with its sidecar; the keymap +// records the original .elastickv-meta.json-suffixed user key. +func writeS3MetaCollisionRename(t *testing.T, root, bucket, renamedKey string, body, sidecar []byte) { + t.Helper() + writeS3Object(t, root, bucket, renamedKey, body, sidecar) +} + +// collisionTestBucket is the fixed bucket name every commit-C +// collision round-trip test uses. Kept as a constant since the +// unparam lint would otherwise flag the parameter as always equal +// to "b". +const collisionTestBucket = "b" + +// assertS3ManifestEmittedUnder verifies that the encoder emitted a +// !s3|obj|head|| manifest record (i.e. the +// original S3 key was recovered from the keymap rather than leaking +// the renamed on-disk filename) and that no record exists under the +// disk-form key. Tests the byte-shape contract directly without +// re-running the decoder, which would re-apply collision renames +// and obscure the assertion. +func assertS3ManifestEmittedUnder(t *testing.T, fsm []byte, originalKey, mustNotKey string) { + t.Helper() + entries, _, err := DecodeLiveEntries(bytes.NewReader(fsm)) + if err != nil { + t.Fatalf("DecodeLiveEntries: %v", err) + } + wantKey := s3keys.ObjectManifestKey(collisionTestBucket, s3RestoreGeneration, originalKey) + dontWantKey := s3keys.ObjectManifestKey(collisionTestBucket, s3RestoreGeneration, mustNotKey) + var found bool + for _, e := range entries { + if bytes.Equal(e.UserKey, wantKey) { + found = true + } + if bytes.Equal(e.UserKey, dontWantKey) { + t.Errorf("emitted manifest for renamed disk key %q (should be recovered to %q)", mustNotKey, originalKey) + } + } + if !found { + t.Fatalf("no manifest record under recovered original key %q", originalKey) + } +} + +// TestS3EncodeRoundTripsLeafDataCollision verifies that a bucket +// holding both .elastickv-leaf-data (the renamed shorter key) +// and /sub (the longer key) is encoded such that the emitted +// !s3|obj|head|| records are keyed by the recovered +// original S3 keys (no .elastickv-leaf-data suffix leak), and the +// longer-key record stays under its unchanged path. +func TestS3EncodeRoundTripsLeafDataCollision(t *testing.T) { + t.Parallel() + in := t.TempDir() + const bucket = "b" + const ( + shorterKey = "path/to" + longerKey = "path/to/sub" + ) + writeS3Bucket(t, in, bucket, []byte(`{"format_version":1,"name":"b"}`)) + shorterBody := []byte("shorter-key body") + writeS3LeafDataRename(t, in, bucket, shorterKey, shorterBody, + s3ObjectSidecar("etag-shorter", int64(len(shorterBody)), "text/plain", "")) + longerBody := []byte("longer-key subobject body") + writeS3Object(t, in, bucket, longerKey, longerBody, + s3ObjectSidecar("etag-longer", int64(len(longerBody)), "text/plain", "")) + writeS3KeymapTracker(t, in, []KeymapRecord{ + leafRecord(shorterKey+S3LeafDataSuffix, shorterKey), + }) + + fsm := encodeS3Tree(t, in) + assertS3ManifestEmittedUnder(t, fsm, shorterKey, shorterKey+S3LeafDataSuffix) + assertS3ManifestEmittedUnder(t, fsm, longerKey, "") +} + +// TestS3EncodeRoundTripsMetaCollision verifies that a meta-suffix +// rename (the decoder's --rename-collisions=true variant for a user +// object whose key naturally ends in .elastickv-meta.json) is +// reversed: the keymap-recorded original key is recovered. Asserts +// at the emitted-record level since a full decoder round-trip would +// reject the .elastickv-meta.json-suffixed key unless invoked with +// --rename-collisions=true. +func TestS3EncodeRoundTripsMetaCollision(t *testing.T) { + t.Parallel() + in := t.TempDir() + const bucket = "b" + const ( + renamedKey = "report.user-data" + originalKey = "report" + S3MetaSuffixReserved + ) + writeS3Bucket(t, in, bucket, []byte(`{"format_version":1,"name":"b"}`)) + body := []byte("user object whose key happens to look like a sidecar") + writeS3MetaCollisionRename(t, in, bucket, renamedKey, body, + s3ObjectSidecar("etag-meta", int64(len(body)), "application/json", "")) + writeS3KeymapTracker(t, in, []KeymapRecord{ + metaRecord(renamedKey, originalKey), + }) + + fsm := encodeS3Tree(t, in) + assertS3ManifestEmittedUnder(t, fsm, originalKey, renamedKey) +} + +// TestS3EncodeRejectsKeymapTargetingReservedSubtree pins gate +// "keymap original key cannot target _orphans or _incomplete_uploads" +// at the encoder integration level (the unit version is in +// encode_s3_collision_test.go). +func TestS3EncodeRejectsKeymapTargetingReservedSubtree(t *testing.T) { + t.Parallel() + for _, reserved := range []string{"_orphans", "_incomplete_uploads"} { + t.Run(reserved, func(t *testing.T) { + t.Parallel() + in := t.TempDir() + const bucket = "b" + writeS3Bucket(t, in, bucket, []byte(`{"format_version":1,"name":"b"}`)) + writeS3Object(t, in, bucket, "renamed-body", + []byte("body"), s3ObjectSidecar("e", 4, "text/plain", "")) + writeS3KeymapTracker(t, in, []KeymapRecord{ + leafRecord("renamed-body", reserved+"/forbidden/key"), + }) + b := newSnapshotBuilder(s3EncTS) + err := NewS3RecordEncoder(in).Encode(b) + if !errors.Is(err, ErrS3EncodeInvalidBucket) { + t.Fatalf("Encode err = %v, want wrap of ErrS3EncodeInvalidBucket", err) + } + }) + } +} + +// TestS3EncodeAcceptsKeymapWithUserUnderscoreKey pins that codex P2 +// v913 v1's narrow-reserved-prefix-rule allows a legitimate user +// key whose first segment starts with _ but is NOT one of the two +// named reserved subtrees (_orphans / _incomplete_uploads). +func TestS3EncodeAcceptsKeymapWithUserUnderscoreKey(t *testing.T) { + t.Parallel() + in := t.TempDir() + const bucket = "b" + const ( + renamedDisk = "renamed-underscore-key" + originalKey = "_foo/legitimate-user-key" + ) + writeS3Bucket(t, in, bucket, []byte(`{"format_version":1,"name":"b"}`)) + body := []byte("user object under a single underscore prefix") + writeS3Object(t, in, bucket, renamedDisk, body, + s3ObjectSidecar("etag-u", int64(len(body)), "text/plain", "")) + writeS3KeymapTracker(t, in, []KeymapRecord{ + // Use KindMetaCollision so the encoded value bypasses the + // .elastickv-leaf-data suffix check. The point of this test + // is the reserved-root validation, not the suffix check. + // metaRecord requires the original to end in + // .elastickv-meta.json - use a Kind=SHAFallback record with + // an arbitrary encoded value to isolate the reserved-root + // gate from per-Kind suffix gates. + { + Encoded: renamedDisk, + OriginalB64: base64.RawURLEncoding.EncodeToString([]byte(originalKey)), + Kind: KindSHAFallback, + }, + }) + + fsm := encodeS3Tree(t, in) + assertS3ManifestEmittedUnder(t, fsm, originalKey, renamedDisk) +} + +// TestS3EncodeRejectsOrphanKeymapEntry pins fail-closed at the +// integration level when KEYMAP.jsonl references an encoded segment +// that has no on-disk file (e.g. the operator removed the renamed +// body but kept the keymap). +func TestS3EncodeRejectsOrphanKeymapEntry(t *testing.T) { + t.Parallel() + in := t.TempDir() + const bucket = "b" + writeS3Bucket(t, in, bucket, []byte(`{"format_version":1,"name":"b"}`)) + writeS3KeymapTracker(t, in, []KeymapRecord{ + leafRecord("ghost"+S3LeafDataSuffix, "ghost"), + }) + b := newSnapshotBuilder(s3EncTS) + err := NewS3RecordEncoder(in).Encode(b) + if !errors.Is(err, ErrS3EncodeInvalidBucket) { + t.Fatalf("Encode err = %v, want wrap of ErrS3EncodeInvalidBucket", err) + } +} + +// TestS3EncodeRejectsDuplicateKeymapEntry pins fail-closed at the +// integration level when KEYMAP.jsonl has two records with the same +// Encoded value (S3 decoder writes one record per rename — a dup +// means a corrupt dump). Loader-level coverage is in +// TestLoadBucketKeymap_DuplicateEncoded. +func TestS3EncodeRejectsDuplicateKeymapEntry(t *testing.T) { + t.Parallel() + in := t.TempDir() + const bucket = "b" + writeS3Bucket(t, in, bucket, []byte(`{"format_version":1,"name":"b"}`)) + writeS3Object(t, in, bucket, "x"+S3LeafDataSuffix, + []byte("body"), s3ObjectSidecar("e", 4, "text/plain", "")) + writeS3KeymapTracker(t, in, []KeymapRecord{ + leafRecord("x"+S3LeafDataSuffix, "x"), + leafRecord("x"+S3LeafDataSuffix, "elsewhere"), + }) + b := newSnapshotBuilder(s3EncTS) + err := NewS3RecordEncoder(in).Encode(b) + if !errors.Is(err, ErrS3EncodeInvalidBucket) { + t.Fatalf("Encode err = %v, want wrap of ErrS3EncodeInvalidBucket", err) + } +} + +// TestS3EncodeRejectsMalformedKeymapJSON pins fail-closed at the +// integration level when KEYMAP.jsonl has an unparseable line. +func TestS3EncodeRejectsMalformedKeymapJSON(t *testing.T) { + t.Parallel() + in := t.TempDir() + const bucket = "b" + writeS3Bucket(t, in, bucket, []byte(`{"format_version":1,"name":"b"}`)) + bucketDir := filepath.Join(in, "s3", EncodeSegment([]byte(bucket))) + if err := os.WriteFile(filepath.Join(bucketDir, "KEYMAP.jsonl"), + []byte("not-json\n"), 0o600); err != nil { + t.Fatalf("write KEYMAP.jsonl: %v", err) + } + b := newSnapshotBuilder(s3EncTS) + err := NewS3RecordEncoder(in).Encode(b) + if !errors.Is(err, ErrS3EncodeInvalidBucket) { + t.Fatalf("Encode err = %v, want wrap of ErrS3EncodeInvalidBucket", err) + } +} + +// TestS3EncodeMissingKeymapIsValidNoCollisionDump pins that the +// M4-2a happy path is unchanged: a bucket without KEYMAP.jsonl +// continues to encode every object at its on-disk path. +func TestS3EncodeMissingKeymapIsValidNoCollisionDump(t *testing.T) { + t.Parallel() + in := t.TempDir() + const bucket = "b" + writeS3Bucket(t, in, bucket, []byte(`{"format_version":1,"name":"b"}`)) + body := []byte("body without any keymap") + writeS3Object(t, in, bucket, "plain/key", body, + s3ObjectSidecar("etag-plain", int64(len(body)), "text/plain", "")) + gotBody, _ := decodeS3Object(t, encodeS3Tree(t, in), bucket, "plain/key") + if !bytes.Equal(gotBody, body) { + t.Fatalf("no-keymap object body = %q, want %q", gotBody, body) + } +} diff --git a/internal/backup/encode_s3_collision_test.go b/internal/backup/encode_s3_collision_test.go index 62008d26..6e053ec1 100644 --- a/internal/backup/encode_s3_collision_test.go +++ b/internal/backup/encode_s3_collision_test.go @@ -13,9 +13,11 @@ import ( // writeS3KeymapTracker writes /s3//KEYMAP.jsonl // with the given records and NO companion .elastickv-meta.json // sidecar, so isKeymapCollisionTracker classifies it as a tracker. -func writeS3KeymapTracker(t *testing.T, root, bucket string, records []KeymapRecord) { +// Bucket is taken from the shared collisionTestBucket constant since +// every commit-A and commit-C fixture uses the same name. +func writeS3KeymapTracker(t *testing.T, root string, records []KeymapRecord) { t.Helper() - bucketDir := filepath.Join(root, "s3", EncodeSegment([]byte(bucket))) + bucketDir := filepath.Join(root, "s3", EncodeSegment([]byte(collisionTestBucket))) if err := os.MkdirAll(bucketDir, 0o755); err != nil { t.Fatalf("MkdirAll: %v", err) } @@ -71,7 +73,6 @@ func metaRecord(encoded, originalSlashKey string) KeymapRecord { func TestLoadBucketKeymap_HappyPath(t *testing.T) { t.Parallel() in := t.TempDir() - const bucket = "b" records := []KeymapRecord{ leafRecord("path/to"+S3LeafDataSuffix, "path/to"), metaRecord("renamed.user-data", "real"+S3MetaSuffixReserved), @@ -81,7 +82,7 @@ func TestLoadBucketKeymap_HappyPath(t *testing.T) { Kind: KindSHAFallback, }, } - writeS3KeymapTracker(t, in, bucket, records) + writeS3KeymapTracker(t, in, records) got, err := loadKeymapForBucketB(t, in) if err != nil { @@ -107,7 +108,7 @@ func TestLoadBucketKeymap_HappyPath(t *testing.T) { func TestLoadBucketKeymap_DuplicateEncoded(t *testing.T) { t.Parallel() in := t.TempDir() - writeS3KeymapTracker(t, in, "b", []KeymapRecord{ + writeS3KeymapTracker(t, in, []KeymapRecord{ leafRecord("path/to"+S3LeafDataSuffix, "path/to"), leafRecord("path/to"+S3LeafDataSuffix, "elsewhere"), }) From f033e469c52fe0d4924f11212a3d35200770f0f7 Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Thu, 4 Jun 2026 22:33:53 +0900 Subject: [PATCH 4/4] backup: M4-2b address #928 review - path traversal + empty original + regular-file sidecar (gemini medium x2 + codex P2) Three findings on PR #928: 1. Gemini medium (encode_s3_collision.go:136): path-traversal guard. rec.Encoded values containing '..', '.', or empty segments would let a hand-crafted keymap escape the bucket sub-root when filepath.Join joins Encoded into the Lstat path. New validateKeymapEncodedPath rejects any non-clean relative path. Pinned by TestValidateKeymapRecord_PathTraversal (6 boundary cases: leading slash, trailing slash, double slash, contains '..' / '.' / middle '..'). 2. Gemini medium (encode_s3_collision.go:143): non-empty original. S3 object keys cannot be empty. Reject the keymap record at load time so the operator sees the problem directly rather than as a downstream object-body assembly error. Pinned by TestValidateKeymapRecord_EmptyOriginal. 3. Codex P2 (encode_s3_objects.go:94): regular-file sidecar. isKeymapCollisionTracker used rootEntryExists to look for a companion sidecar at /KEYMAP.jsonl.elastickv-meta.json, but rootEntryExists returns true for any Lstat-able entry including a directory. A user key prefix 'KEYMAP.jsonl.elastickv-meta.json/...' creates a directory at the sidecar path, so the tracker was mis-classified as a user object and keymap loading was skipped. Fix: require the sidecar to be a regular file. Only a real file disambiguates a legitimate user object; any other mode means the tracker classification stands. Pinned by TestS3EncodeTrackerWithDirectorySidecar. Caller audit: - validateKeymapRecord: split into validateKeymapEncodedPath + validateKeymapKindSuffix to keep cyclop under 10. Only callers are loadBucketKeymap (production) + test sites. - rootEntryExists: had only one caller (isKeymapCollisionTracker) which now inlines the Lstat + IsRegular check; the helper was removed. - isKeymapCollisionTracker: same single call site (encodeBucketObjects). Behavior tightened only for the dir-at- sidecar-path edge case; happy path unchanged. go test + golangci-lint clean on ./internal/backup/. --- internal/backup/encode_s3_collision.go | 34 +++++++++++++ .../encode_s3_collision_roundtrip_test.go | 36 ++++++++++++++ internal/backup/encode_s3_collision_test.go | 49 +++++++++++++++++++ internal/backup/encode_s3_objects.go | 34 +++++++------ 4 files changed, 139 insertions(+), 14 deletions(-) diff --git a/internal/backup/encode_s3_collision.go b/internal/backup/encode_s3_collision.go index 5cda2111..dbf487ee 100644 --- a/internal/backup/encode_s3_collision.go +++ b/internal/backup/encode_s3_collision.go @@ -133,14 +133,48 @@ func validateKeymapRecord(rec KeymapRecord) error { return errors.Wrapf(ErrInvalidKeymapRecord, "unknown kind %q for encoded segment %q", rec.Kind, rec.Encoded) } + if err := validateKeymapEncodedPath(rec.Encoded); err != nil { + return err + } originalBytes, err := rec.Original() if err != nil { return err } original := string(originalBytes) + // S3 object keys cannot be empty; reject an empty decoded + // original here so the operator sees the keymap problem at load + // time rather than at object-body assembly. Gemini medium PR #928. + if original == "" { + return errors.Wrapf(ErrInvalidKeymapRecord, + "empty original key for encoded segment %q (kind=%q)", rec.Encoded, rec.Kind) + } if err := validateKeymapReservedRoot(rec, original); err != nil { return err } + return validateKeymapKindSuffix(rec, original) +} + +// validateKeymapEncodedPath rejects rec.Encoded values that would +// escape the bucket sub-root or carry empty segments. The lookup +// site (resolveObjectKeyFromRel) joins this value under bucketDir +// when calling root.Lstat, and ".." would silently break out of the +// bucket. Gemini medium PR #928. +func validateKeymapEncodedPath(encoded string) error { + for _, seg := range strings.Split(encoded, "/") { + if seg == "" || seg == "." || seg == ".." { + return errors.Wrapf(ErrInvalidKeymapRecord, + "encoded path %q contains invalid segment %q", encoded, seg) + } + } + return nil +} + +// validateKeymapKindSuffix enforces the Kind-specific suffix +// invariants: KindS3LeafData encoded values must end in +// S3LeafDataSuffix; KindMetaCollision original values must end in +// S3MetaSuffixReserved; KindSHAFallback has no extra invariant +// today (forward-compat). +func validateKeymapKindSuffix(rec KeymapRecord, original string) error { switch rec.Kind { case KindS3LeafData: if !strings.HasSuffix(rec.Encoded, S3LeafDataSuffix) { diff --git a/internal/backup/encode_s3_collision_roundtrip_test.go b/internal/backup/encode_s3_collision_roundtrip_test.go index 35be2f59..b5685a12 100644 --- a/internal/backup/encode_s3_collision_roundtrip_test.go +++ b/internal/backup/encode_s3_collision_roundtrip_test.go @@ -248,6 +248,42 @@ func TestS3EncodeRejectsMalformedKeymapJSON(t *testing.T) { } } +// TestS3EncodeTrackerWithDirectorySidecar pins codex P2 PR #928. A +// user key prefix that happens to be `KEYMAP.jsonl.elastickv-meta.json/` +// creates a directory at the sidecar path. Earlier code treated ANY +// entry at the sidecar path as "the sidecar exists" and skipped the +// keymap-loading branch, mis-classifying the tracker file as a user +// object. The fix requires the sidecar to be a regular file before +// the tracker can be bypassed. +func TestS3EncodeTrackerWithDirectorySidecar(t *testing.T) { + t.Parallel() + in := t.TempDir() + writeS3Bucket(t, in, collisionTestBucket, []byte(`{"format_version":1,"name":"b"}`)) + bucketDir := filepath.Join(in, "s3", EncodeSegment([]byte(collisionTestBucket))) + // Tracker file (no companion regular-file sidecar). + writeS3KeymapTracker(t, in, []KeymapRecord{ + leafRecord("path/to"+S3LeafDataSuffix, "path/to"), + }) + // User-key subtree whose prefix happens to be the sidecar name. + // This creates a DIRECTORY at /KEYMAP.jsonl.elastickv-meta.json/. + if err := os.MkdirAll(filepath.Join(bucketDir, "KEYMAP.jsonl"+S3MetaSuffixReserved), 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + // The renamed leaf-data body for the tracker entry. + body := []byte("shorter-key body") + writeS3LeafDataRename(t, in, collisionTestBucket, "path/to", body, + s3ObjectSidecar("etag-1", int64(len(body)), "text/plain", "")) + // path/to/sub to make the leaf-data rename plausible. + longerBody := []byte("longer-key subobject body") + writeS3Object(t, in, collisionTestBucket, "path/to/sub", longerBody, + s3ObjectSidecar("etag-2", int64(len(longerBody)), "text/plain", "")) + + fsm := encodeS3Tree(t, in) + // The tracker classification stood despite the directory at the + // sidecar path, so the keymap loaded and "path/to" recovered. + assertS3ManifestEmittedUnder(t, fsm, "path/to", "path/to"+S3LeafDataSuffix) +} + // TestS3EncodeMissingKeymapIsValidNoCollisionDump pins that the // M4-2a happy path is unchanged: a bucket without KEYMAP.jsonl // continues to encode every object at its on-disk path. diff --git a/internal/backup/encode_s3_collision_test.go b/internal/backup/encode_s3_collision_test.go index 6e053ec1..6dcc40b3 100644 --- a/internal/backup/encode_s3_collision_test.go +++ b/internal/backup/encode_s3_collision_test.go @@ -204,6 +204,55 @@ func TestValidateKeymapRecord_KindMetaSuffix(t *testing.T) { } } +// TestValidateKeymapRecord_PathTraversal pins gemini medium PR #928: +// rec.Encoded must be a clean relative path. `..`, `.`, and empty +// segments would let a hand-crafted dump escape the bucket +// sub-root when filepath.Join joins encoded into the Lstat path. +func TestValidateKeymapRecord_PathTraversal(t *testing.T) { + t.Parallel() + cases := []struct { + name string + encoded string + }{ + {"contains ..", "../escape" + S3LeafDataSuffix}, + {"contains .", "./still-bad" + S3LeafDataSuffix}, + {"double slash (empty segment)", "path//to" + S3LeafDataSuffix}, + {"trailing slash (empty segment)", "path/to" + S3LeafDataSuffix + "/"}, + {"leading slash (empty segment)", "/path/to" + S3LeafDataSuffix}, + {"middle ..", "path/../escape" + S3LeafDataSuffix}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + t.Parallel() + rec := leafRecord(c.encoded, "any/orig") + err := validateKeymapRecord(rec) + if !errors.Is(err, ErrInvalidKeymapRecord) { + t.Fatalf("err = %v, want ErrInvalidKeymapRecord", err) + } + }) + } +} + +// TestValidateKeymapRecord_EmptyOriginal pins gemini medium PR #928: +// an S3 object key cannot be empty. Reject the keymap record at +// load time rather than letting it propagate to object-body +// assembly where the diagnostic is harder to read. +func TestValidateKeymapRecord_EmptyOriginal(t *testing.T) { + t.Parallel() + rec := KeymapRecord{ + Encoded: "any" + S3LeafDataSuffix, + OriginalB64: base64.RawURLEncoding.EncodeToString([]byte("")), + Kind: KindS3LeafData, + } + err := validateKeymapRecord(rec) + if !errors.Is(err, ErrInvalidKeymapRecord) { + t.Fatalf("err = %v, want ErrInvalidKeymapRecord", err) + } + if !strings.Contains(err.Error(), "empty original") { + t.Fatalf("err message = %v, want empty-original message", err) + } +} + // TestValidateKeymapRecord_UnknownKind pins the forward-compat // guard: an unrecognized Kind value fails closed rather than // silently passing. diff --git a/internal/backup/encode_s3_objects.go b/internal/backup/encode_s3_objects.go index a4b9c582..56964112 100644 --- a/internal/backup/encode_s3_objects.go +++ b/internal/backup/encode_s3_objects.go @@ -105,9 +105,17 @@ func (e *S3RecordEncoder) encodeBucketObjects(b *snapshotBuilder, root *os.Root, // isKeymapCollisionTracker reports whether the bucket's top-level // KEYMAP.jsonl is the decoder's collision-rename tracker. The tracker is a // regular FILE with no companion sidecar. It is NOT: -// - a user object literally named KEYMAP.jsonl (which has a sidecar), or +// - a user object literally named KEYMAP.jsonl (which has a regular- +// file sidecar at KEYMAP.jsonl.elastickv-meta.json), or // - a directory holding objects under the "KEYMAP.jsonl/" key prefix // (a directory, not a file). +// +// The sidecar disambiguation requires the sidecar to be a REGULAR FILE +// (not just any entry). A user object whose key prefix happens to be +// `KEYMAP.jsonl.elastickv-meta.json/` would create a directory at that +// path; treating that directory as "the sidecar" would mis-classify +// the tracker file as a user object and skip keymap loading. Codex P2 +// PR #928. func (e *S3RecordEncoder) isKeymapCollisionTracker(root *os.Root, bucketDir string) (bool, error) { keymapRel := filepath.Join(bucketDir, "KEYMAP.jsonl") linfo, err := root.Lstat(keymapRel) @@ -120,24 +128,22 @@ func (e *S3RecordEncoder) isKeymapCollisionTracker(root *os.Root, bucketDir stri if !linfo.Mode().IsRegular() { return false, nil // a directory (KEYMAP.jsonl/ key prefix) or other } - hasSidecar, err := rootEntryExists(root, keymapRel+S3MetaSuffixReserved) - if err != nil { - return false, err - } - return !hasSidecar, nil -} - -// rootEntryExists reports whether rel exists within root (via Lstat, so it -// does not follow a final symlink). -func rootEntryExists(root *os.Root, rel string) (bool, error) { - _, err := root.Lstat(rel) + sinfo, err := root.Lstat(keymapRel + S3MetaSuffixReserved) if errors.Is(err, os.ErrNotExist) { - return false, nil + // No sidecar at all → this KEYMAP.jsonl is the tracker. + return true, nil } if err != nil { return false, errors.WithStack(err) } - return true, nil + // Sidecar entry exists, but only a regular file disambiguates a + // legitimate user object. A directory at the sidecar path means + // some user key prefix collides with `KEYMAP.jsonl.elastickv-meta.json/` + // — that is NOT a sidecar; the tracker classification stands. + if !sinfo.Mode().IsRegular() { + return true, nil + } + return false, nil } // reservedDirHoldsSidecar reports whether dirRel (a reserved dump dir like