diff --git a/cmd/entire/cli/checkpoint/committed.go b/cmd/entire/cli/checkpoint/committed.go index 7001d6d7f..f2b6633f7 100644 --- a/cmd/entire/cli/checkpoint/committed.go +++ b/cmd/entire/cli/checkpoint/committed.go @@ -350,7 +350,7 @@ func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCom // Write prompts if len(opts.Prompts) > 0 { - promptContent := redact.String(strings.Join(opts.Prompts, "\n\n---\n\n")) + promptContent := redact.String(JoinPrompts(opts.Prompts)) blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent)) if err != nil { return filePaths, err @@ -1281,7 +1281,7 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti // Replace prompts (apply redaction as safety net) if len(opts.Prompts) > 0 { - promptContent := redact.String(strings.Join(opts.Prompts, "\n\n---\n\n")) + promptContent := redact.String(JoinPrompts(opts.Prompts)) blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent)) if err != nil { return fmt.Errorf("failed to create prompt blob: %w", err) diff --git a/cmd/entire/cli/checkpoint/prompts.go b/cmd/entire/cli/checkpoint/prompts.go new file mode 100644 index 000000000..fc8d26d35 --- /dev/null +++ b/cmd/entire/cli/checkpoint/prompts.go @@ -0,0 +1,25 @@ +package checkpoint + +import "strings" + +// PromptSeparator is the canonical separator used in prompt.txt when multiple +// prompts are stored in a single file. +const PromptSeparator = "\n\n---\n\n" + +// JoinPrompts serializes prompts to prompt.txt format. +func JoinPrompts(prompts []string) string { + return strings.Join(prompts, PromptSeparator) +} + +// SplitPromptContent deserializes prompt.txt content into individual prompts. +func SplitPromptContent(content string) []string { + if content == "" { + return nil + } + + prompts := strings.Split(content, PromptSeparator) + for len(prompts) > 0 && prompts[len(prompts)-1] == "" { + prompts = prompts[:len(prompts)-1] + } + return prompts +} diff --git a/cmd/entire/cli/checkpoint/prompts_test.go b/cmd/entire/cli/checkpoint/prompts_test.go new file mode 100644 index 000000000..4b1119625 --- /dev/null +++ b/cmd/entire/cli/checkpoint/prompts_test.go @@ -0,0 +1,29 @@ +package checkpoint + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestJoinAndSplitPrompts_RoundTrip(t *testing.T) { + t.Parallel() + + original := []string{ + "first line\nwith newline", + "second prompt", + } + + joined := JoinPrompts(original) + split := SplitPromptContent(joined) + + require.Len(t, split, 2) + assert.Equal(t, original, split) +} + +func TestSplitPromptContent_EmptyContent(t *testing.T) { + t.Parallel() + + assert.Nil(t, SplitPromptContent("")) +} diff --git a/cmd/entire/cli/checkpoint/v2_committed.go b/cmd/entire/cli/checkpoint/v2_committed.go index 823504462..85250c073 100644 --- a/cmd/entire/cli/checkpoint/v2_committed.go +++ b/cmd/entire/cli/checkpoint/v2_committed.go @@ -121,7 +121,7 @@ func (s *V2GitStore) updateCommittedMain(ctx context.Context, opts UpdateCommitt sessionPath := fmt.Sprintf("%s%d/", basePath, sessionIndex) if len(opts.Prompts) > 0 { - promptContent := redact.String(strings.Join(opts.Prompts, "\n\n---\n\n")) + promptContent := redact.String(JoinPrompts(opts.Prompts)) blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent)) if err != nil { return 0, fmt.Errorf("failed to create prompt blob: %w", err) @@ -334,7 +334,7 @@ func (s *V2GitStore) writeMainSessionToSubdirectory(opts WriteCommittedOptions, // Write prompts if len(opts.Prompts) > 0 { - promptContent := redact.String(strings.Join(opts.Prompts, "\n\n---\n\n")) + promptContent := redact.String(JoinPrompts(opts.Prompts)) blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent)) if err != nil { return filePaths, err diff --git a/cmd/entire/cli/migrate.go b/cmd/entire/cli/migrate.go new file mode 100644 index 000000000..38e84ecac --- /dev/null +++ b/cmd/entire/cli/migrate.go @@ -0,0 +1,555 @@ +package cli + +import ( + "context" + "errors" + "fmt" + "io" + "log/slog" + "strconv" + "strings" + + "github.com/entireio/cli/cmd/entire/cli/checkpoint" + "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" + "github.com/entireio/cli/cmd/entire/cli/logging" + "github.com/entireio/cli/cmd/entire/cli/paths" + "github.com/entireio/cli/cmd/entire/cli/strategy" + "github.com/entireio/cli/cmd/entire/cli/transcript/compact" + "github.com/entireio/cli/cmd/entire/cli/versioninfo" + "github.com/go-git/go-git/v6" + "github.com/go-git/go-git/v6/plumbing" + "github.com/go-git/go-git/v6/plumbing/object" + "github.com/spf13/cobra" +) + +func newMigrateCmd() *cobra.Command { + var checkpointsFlag string + + cmd := &cobra.Command{ + Use: "migrate", + Short: "Migrate Entire data to newer formats", + Long: `Migrate Entire data to newer formats. Currently supports migrating v1 checkpoints to v2.`, + Hidden: true, + RunE: func(cmd *cobra.Command, _ []string) error { + if checkpointsFlag == "" { + return cmd.Help() + } + if checkpointsFlag != "v2" { + return fmt.Errorf("unsupported checkpoints version: %q (only \"v2\" is supported)", checkpointsFlag) + } + + ctx := cmd.Context() + + if _, err := paths.WorktreeRoot(ctx); err != nil { + cmd.SilenceUsage = true + fmt.Fprintln(cmd.ErrOrStderr(), "Not a git repository. Please run from within a git repository.") + return NewSilentError(errors.New("not a git repository")) + } + + logging.SetLogLevelGetter(GetLogLevel) + if initErr := logging.Init(ctx, ""); initErr != nil { + fmt.Fprintf(cmd.ErrOrStderr(), "Warning: could not initialize logging: %v\n", initErr) + } else { + defer logging.Close() + } + return runMigrateCheckpointsV2(ctx, cmd) + }, + } + + cmd.Flags().StringVar(&checkpointsFlag, "checkpoints", "", "Target checkpoint format version (e.g., \"v2\")") + + return cmd +} + +type migrateResult struct { + migrated int + skipped int + failed int +} + +func runMigrateCheckpointsV2(ctx context.Context, cmd *cobra.Command) error { + repo, err := strategy.OpenRepository(ctx) + if err != nil { + cmd.SilenceUsage = true + fmt.Fprintln(cmd.ErrOrStderr(), "Not a git repository. Please run from within a git repository.") + return NewSilentError(err) + } + + v1Store := checkpoint.NewGitStore(repo) + v2Store := checkpoint.NewV2GitStore(repo, migrateRemoteName) + out := cmd.OutOrStdout() + + result, err := migrateCheckpointsV2(ctx, repo, v1Store, v2Store, out) + if err != nil { + return err + } + + fmt.Fprintf(out, "\nMigration complete: %d migrated, %d skipped, %d failed\n", + result.migrated, result.skipped, result.failed) + + if result.failed > 0 { + fmt.Fprintf(out, "%d checkpoint(s) failed to migrate. Check .entire/logs/ for details.\n", result.failed) + return NewSilentError(fmt.Errorf("%d checkpoint(s) failed to migrate", result.failed)) + } + + return nil +} + +var ( + errAlreadyMigrated = errors.New("already migrated") + errTranscriptNotGeneratable = errors.New("transcript.jsonl could not be generated") +) + +const migrateRemoteName = "origin" + +func migrateCheckpointsV2(ctx context.Context, repo *git.Repository, v1Store *checkpoint.GitStore, v2Store *checkpoint.V2GitStore, out io.Writer) (*migrateResult, error) { + v1List, err := v1Store.ListCommitted(ctx) + if err != nil { + return nil, fmt.Errorf("failed to list v1 checkpoints: %w", err) + } + + if len(v1List) == 0 { + fmt.Fprintln(out, "Nothing to migrate: no v1 checkpoints found") + return &migrateResult{}, nil + } + + fmt.Fprintln(out, "Migrating v1 checkpoints to v2...") + total := len(v1List) + result := &migrateResult{} + + for i, info := range v1List { + prefix := fmt.Sprintf(" [%d/%d] Migrating checkpoint %s...", i+1, total, info.CheckpointID) + + if migrateErr := migrateOneCheckpoint(ctx, repo, v1Store, v2Store, info, out, prefix); migrateErr != nil { + switch { + case errors.Is(migrateErr, errAlreadyMigrated): + fmt.Fprintf(out, "%s skipped (already in v2)\n", prefix) + result.skipped++ + case errors.Is(migrateErr, errTranscriptNotGeneratable): + fmt.Fprintf(out, "%s in v2, but %s\n", prefix, migrateErr.Error()) + result.skipped++ + default: + fmt.Fprintf(out, "%s failed\n", prefix) + logging.Error(ctx, "checkpoint migration failed", + slog.String("checkpoint_id", string(info.CheckpointID)), + slog.String("error", migrateErr.Error()), + ) + result.failed++ + } + continue + } + + result.migrated++ + } + + return result, nil +} + +func migrateOneCheckpoint(ctx context.Context, repo *git.Repository, v1Store *checkpoint.GitStore, v2Store *checkpoint.V2GitStore, info checkpoint.CommittedInfo, out io.Writer, prefix string) error { + existing, err := v2Store.ReadCommitted(ctx, info.CheckpointID) + if err != nil { + return fmt.Errorf("failed to check v2 for checkpoint %s: %w", info.CheckpointID, err) + } + + // Already in v2 — check if any aspect of sessions are missing and backfill + if existing != nil { + repaired, repairErr := repairPartialV2Checkpoint(ctx, repo, v1Store, v2Store, info, existing) + if repairErr != nil { + return repairErr + } + + currentV2, readCurrentErr := v2Store.ReadCommitted(ctx, info.CheckpointID) + if readCurrentErr != nil { + return fmt.Errorf("failed to re-read v2 checkpoint %s: %w", info.CheckpointID, readCurrentErr) + } + if currentV2 == nil { + return fmt.Errorf("v2 checkpoint %s disappeared during migration", info.CheckpointID) + } + + backfillErr := backfillCompactTranscripts(ctx, v1Store, v2Store, info, currentV2, out, prefix) + if errors.Is(backfillErr, errAlreadyMigrated) && repaired { + fmt.Fprintf(out, "%s repaired partial v2 checkpoint state\n", prefix) + return nil + } + if errors.Is(backfillErr, errTranscriptNotGeneratable) && repaired { + fmt.Fprintf(out, "%s repaired partial v2 checkpoint state (compact transcript not generated)\n", prefix) + return nil + } + return backfillErr + } + + summary, err := v1Store.ReadCommitted(ctx, info.CheckpointID) + if err != nil { + return fmt.Errorf("failed to read v1 summary: %w", err) + } + if summary == nil { + return fmt.Errorf("v1 checkpoint %s has no summary", info.CheckpointID) + } + + compactFailed := false + shouldCopyTaskMetadata := false + + for sessionIdx := range len(summary.Sessions) { + content, readErr := v1Store.ReadSessionContent(ctx, info.CheckpointID, sessionIdx) + if readErr != nil { + return fmt.Errorf("failed to read v1 session %d: %w", sessionIdx, readErr) + } + if content.Metadata.IsTask { + shouldCopyTaskMetadata = true + } + + opts := buildMigrateWriteOpts(content, info) + + compacted := tryCompactTranscript(ctx, content.Transcript, content.Metadata) + if compacted != nil { + opts.CompactTranscript = compacted + } else if len(content.Transcript) > 0 { + compactFailed = true + } + + if writeErr := v2Store.WriteCommitted(ctx, opts); writeErr != nil { + return fmt.Errorf("failed to write v2 session %d: %w", sessionIdx, writeErr) + } + } + + // Copy task metadata trees from v1 to v2 /full/current + if shouldCopyTaskMetadata { + if taskErr := copyTaskMetadataToV2(repo, v1Store, v2Store, info.CheckpointID, summary); taskErr != nil { + logging.Warn(ctx, "failed to copy task metadata to v2", + slog.String("checkpoint_id", string(info.CheckpointID)), + slog.String("error", taskErr.Error()), + ) + } + } + + if compactFailed { + fmt.Fprintf(out, "%s done (compact transcript not generated)\n", prefix) + } else { + fmt.Fprintf(out, "%s done\n", prefix) + } + + return nil +} + +func repairPartialV2Checkpoint(ctx context.Context, repo *git.Repository, v1Store *checkpoint.GitStore, v2Store *checkpoint.V2GitStore, info checkpoint.CommittedInfo, v2Summary *checkpoint.CheckpointSummary) (bool, error) { + repaired := false + + // Spot-check already present sessions: ensure required /full/current artifacts exist. + existingSessionCount := len(v2Summary.Sessions) + for sessionIdx := range existingSessionCount { + ok, checkErr := hasCurrentFullSessionArtifacts(repo, v2Store, info.CheckpointID, sessionIdx) + if checkErr != nil { + return false, fmt.Errorf("failed to check v2 session %d artifacts: %w", sessionIdx, checkErr) + } + if ok { + continue + } + + content, readErr := v1Store.ReadSessionContent(ctx, info.CheckpointID, sessionIdx) + if readErr != nil { + return false, fmt.Errorf("failed to read v1 session %d while repairing v2: %w", sessionIdx, readErr) + } + + updateOpts := checkpoint.UpdateCommittedOptions{ + CheckpointID: info.CheckpointID, + SessionID: content.Metadata.SessionID, + Transcript: content.Transcript, + Prompts: checkpoint.SplitPromptContent(content.Prompts), + Agent: content.Metadata.Agent, + } + if compacted := tryCompactTranscript(ctx, content.Transcript, content.Metadata); compacted != nil { + updateOpts.CompactTranscript = compacted + } + + if updateErr := v2Store.UpdateCommitted(ctx, updateOpts); updateErr != nil { + return false, fmt.Errorf("failed to repair v2 session %d: %w", sessionIdx, updateErr) + } + repaired = true + } + + return repaired, nil +} + +func hasCurrentFullSessionArtifacts(repo *git.Repository, v2Store *checkpoint.V2GitStore, cpID id.CheckpointID, sessionIdx int) (bool, error) { + _, rootTreeHash, err := v2Store.GetRefState(plumbing.ReferenceName(paths.V2FullCurrentRefName)) + if err != nil { + return false, nil //nolint:nilerr // Missing /full/current ref means required artifacts are absent. + } + + rootTree, err := repo.TreeObject(rootTreeHash) + if err != nil { + return false, fmt.Errorf("failed to read /full/current tree: %w", err) + } + + sessionPath := fmt.Sprintf("%s/%d", cpID.Path(), sessionIdx) + sessionTree, err := rootTree.Tree(sessionPath) + if err != nil { + return false, nil //nolint:nilerr // Missing session path means artifacts are absent, not a hard error. + } + + hasTranscript := false + for _, entry := range sessionTree.Entries { + if entry.Name == paths.TranscriptFileName || strings.HasPrefix(entry.Name, paths.TranscriptFileName+".") { + hasTranscript = true + break + } + } + if !hasTranscript { + return false, nil + } + + if _, err := sessionTree.File(paths.ContentHashFileName); err != nil { + return false, nil //nolint:nilerr // Missing content hash indicates incomplete /full/current artifacts. + } + + return true, nil +} + +// backfillCompactTranscripts checks sessions in an already-migrated v2 checkpoint +// for missing transcript.jsonl and attempts to generate + write them from v1 data. +// Returns errAlreadyMigrated if all sessions already have compact transcripts. +func backfillCompactTranscripts(ctx context.Context, v1Store *checkpoint.GitStore, v2Store *checkpoint.V2GitStore, info checkpoint.CommittedInfo, v2Summary *checkpoint.CheckpointSummary, out io.Writer, prefix string) error { + // Find sessions missing transcript.jsonl + var needsBackfill []int + for i, session := range v2Summary.Sessions { + if session.Transcript == "" { + needsBackfill = append(needsBackfill, i) + } + } + + if len(needsBackfill) == 0 { + return errAlreadyMigrated + } + + backfilled := 0 + var lastAgent string + + for _, sessionIdx := range needsBackfill { + content, readErr := v1Store.ReadSessionContent(ctx, info.CheckpointID, sessionIdx) + if readErr != nil { + logging.Warn(ctx, "transcript.jsonl backfill: could not read v1 session", + slog.String("checkpoint_id", string(info.CheckpointID)), + slog.Int("session_index", sessionIdx), + slog.String("error", readErr.Error()), + ) + continue + } + + if content.Metadata.Agent != "" { + lastAgent = string(content.Metadata.Agent) + } + + compacted := tryCompactTranscript(ctx, content.Transcript, content.Metadata) + if compacted == nil { + // tryCompactTranscript already logs for no-agent and compact-error cases; + // log the empty-transcript case here. + if len(content.Transcript) == 0 { + logging.Warn(ctx, "transcript.jsonl backfill: empty transcript in v1", + slog.String("checkpoint_id", string(info.CheckpointID)), + slog.Int("session_index", sessionIdx), + ) + } + continue + } + + updateErr := v2Store.UpdateCommitted(ctx, checkpoint.UpdateCommittedOptions{ + CheckpointID: info.CheckpointID, + SessionID: content.Metadata.SessionID, + CompactTranscript: compacted, + }) + if updateErr != nil { + logging.Warn(ctx, "transcript.jsonl backfill: failed to write to v2", + slog.String("checkpoint_id", string(info.CheckpointID)), + slog.Int("session_index", sessionIdx), + slog.String("error", updateErr.Error()), + ) + continue + } + + backfilled++ + } + + if backfilled == 0 { + if lastAgent != "" { + return fmt.Errorf("%w: agent %q", errTranscriptNotGeneratable, lastAgent) + } + return fmt.Errorf("%w: no agent type in metadata", errTranscriptNotGeneratable) + } + + fmt.Fprintf(out, "%s added transcript.jsonl for %d session(s)\n", prefix, backfilled) + + return nil +} + +func buildMigrateWriteOpts(content *checkpoint.SessionContent, info checkpoint.CommittedInfo) checkpoint.WriteCommittedOptions { + m := content.Metadata + + prompts := checkpoint.SplitPromptContent(content.Prompts) + + return checkpoint.WriteCommittedOptions{ + CheckpointID: info.CheckpointID, + SessionID: m.SessionID, + Strategy: m.Strategy, + Branch: m.Branch, + Transcript: content.Transcript, + Prompts: prompts, + FilesTouched: m.FilesTouched, + CheckpointsCount: m.CheckpointsCount, + Agent: m.Agent, + Model: m.Model, + TurnID: m.TurnID, + TokenUsage: m.TokenUsage, + SessionMetrics: m.SessionMetrics, + InitialAttribution: m.InitialAttribution, + Summary: m.Summary, + CheckpointTranscriptStart: m.GetTranscriptStart(), + TranscriptIdentifierAtStart: m.TranscriptIdentifierAtStart, + IsTask: m.IsTask, + ToolUseID: m.ToolUseID, + AuthorName: "Entire Migration", + AuthorEmail: "migration@entire.dev", + } +} + +func tryCompactTranscript(ctx context.Context, transcript []byte, m checkpoint.CommittedMetadata) []byte { + if len(transcript) == 0 { + return nil + } + if m.Agent == "" { + logging.Warn(ctx, "compact transcript skipped: no agent type in checkpoint metadata", + slog.String("checkpoint_id", string(m.CheckpointID)), + ) + return nil + } + + compacted, err := compact.Compact(transcript, compact.MetadataFields{ + Agent: string(m.Agent), + CLIVersion: versioninfo.Version, + StartLine: m.GetTranscriptStart(), + }) + if err != nil { + logging.Warn(ctx, "compact transcript generation failed during migration", + slog.String("checkpoint_id", string(m.CheckpointID)), + slog.String("agent", string(m.Agent)), + slog.String("error", err.Error()), + ) + return nil + } + if len(compacted) == 0 { + logging.Warn(ctx, "transcript.jsonl generation produced no output", + slog.String("checkpoint_id", string(m.CheckpointID)), + slog.String("agent", string(m.Agent)), + slog.Int("input_bytes", len(transcript)), + ) + return nil + } + return compacted +} + +// copyTaskMetadataToV2 copies task metadata files (subagent transcripts, checkpoint JSONs) +// from the v1 branch to the v2 /full/current ref via tree surgery. +func copyTaskMetadataToV2(repo *git.Repository, _ *checkpoint.GitStore, v2Store *checkpoint.V2GitStore, cpID id.CheckpointID, summary *checkpoint.CheckpointSummary) error { + // Resolve the v1 branch tree + v1Tree, err := resolveV1CheckpointTree(repo, cpID) + if err != nil { + return err + } + + // Legacy v1 layout stores task metadata at checkpoint root: /tasks//... + // Prefer attaching this tree to the latest session in v2. + if rootTasksTree, rootTasksErr := v1Tree.Tree("tasks"); rootTasksErr == nil { + if len(summary.Sessions) > 0 { + latestSessionIdx := len(summary.Sessions) - 1 + if spliceErr := spliceTasksTreeToV2(repo, v2Store, cpID, latestSessionIdx, rootTasksTree.Hash); spliceErr != nil { + return fmt.Errorf("latest session task tree splice failed: %w", spliceErr) + } + } + } + + for sessionIdx := range len(summary.Sessions) { + sessionDir := strconv.Itoa(sessionIdx) + sessionTree, sessionErr := v1Tree.Tree(sessionDir) + if sessionErr != nil { + continue + } + + tasksTree, tasksErr := sessionTree.Tree("tasks") + if tasksErr != nil { + continue // No tasks directory in this session + } + + if spliceErr := spliceTasksTreeToV2(repo, v2Store, cpID, sessionIdx, tasksTree.Hash); spliceErr != nil { + return fmt.Errorf("session %d task tree splice failed: %w", sessionIdx, spliceErr) + } + } + + return nil +} + +// resolveV1CheckpointTree reads the checkpoint subtree from the v1 branch. +func resolveV1CheckpointTree(repo *git.Repository, cpID id.CheckpointID) (*object.Tree, error) { + refName := plumbing.NewBranchReferenceName(paths.MetadataBranchName) + ref, err := repo.Reference(refName, true) + if err != nil { + // Try remote tracking branch + remoteRefName := plumbing.NewRemoteReferenceName(migrateRemoteName, paths.MetadataBranchName) + ref, err = repo.Reference(remoteRefName, true) + if err != nil { + return nil, fmt.Errorf("v1 branch not found: %w", err) + } + } + + commit, err := repo.CommitObject(ref.Hash()) + if err != nil { + return nil, fmt.Errorf("failed to get v1 commit: %w", err) + } + + rootTree, err := commit.Tree() + if err != nil { + return nil, fmt.Errorf("failed to get v1 tree: %w", err) + } + + cpTree, err := rootTree.Tree(cpID.Path()) + if err != nil { + return nil, fmt.Errorf("checkpoint %s not found in v1 tree: %w", cpID, err) + } + + return cpTree, nil +} + +func spliceTasksTreeToV2(repo *git.Repository, v2Store *checkpoint.V2GitStore, cpID id.CheckpointID, sessionIdx int, tasksTreeHash plumbing.Hash) error { + refName := plumbing.ReferenceName(paths.V2FullCurrentRefName) + parentHash, rootTreeHash, err := v2Store.GetRefState(refName) + if err != nil { + return fmt.Errorf("failed to get v2 ref state: %w", err) + } + incomingTasksTree, err := repo.TreeObject(tasksTreeHash) + if err != nil { + return fmt.Errorf("failed to read tasks tree: %w", err) + } + + shardPrefix := string(cpID[:2]) + shardSuffix := string(cpID[2:]) + sessionDir := strconv.Itoa(sessionIdx) + + newRoot, err := checkpoint.UpdateSubtree(repo, rootTreeHash, + []string{shardPrefix, shardSuffix, sessionDir, "tasks"}, + incomingTasksTree.Entries, + checkpoint.UpdateSubtreeOptions{MergeMode: checkpoint.MergeKeepExisting}, + ) + if err != nil { + return fmt.Errorf("tree surgery failed: %w", err) + } + + commitHash, err := checkpoint.CreateCommit(repo, newRoot, parentHash, + fmt.Sprintf("Add task metadata for %s\n", cpID), + "Entire Migration", "migration@entire.dev") + if err != nil { + return fmt.Errorf("failed to create commit: %w", err) + } + + if err := repo.Storer.SetReference(plumbing.NewHashReference(refName, commitHash)); err != nil { + return fmt.Errorf("failed to update ref %s: %w", refName, err) + } + return nil +} diff --git a/cmd/entire/cli/migrate_test.go b/cmd/entire/cli/migrate_test.go new file mode 100644 index 000000000..6e23eef68 --- /dev/null +++ b/cmd/entire/cli/migrate_test.go @@ -0,0 +1,491 @@ +package cli + +import ( + "bytes" + "context" + "strconv" + "strings" + "testing" + + "github.com/entireio/cli/cmd/entire/cli/checkpoint" + "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" + "github.com/entireio/cli/cmd/entire/cli/paths" + "github.com/entireio/cli/cmd/entire/cli/testutil" + "github.com/go-git/go-git/v6" + "github.com/go-git/go-git/v6/plumbing" + "github.com/go-git/go-git/v6/plumbing/filemode" + "github.com/go-git/go-git/v6/plumbing/object" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// initMigrateTestRepo creates a repo with an initial commit. +func initMigrateTestRepo(t *testing.T) *git.Repository { + t.Helper() + dir := t.TempDir() + testutil.InitRepo(t, dir) + testutil.WriteFile(t, dir, "README.md", "init") + testutil.GitAdd(t, dir, "README.md") + testutil.GitCommit(t, dir, "initial") + + repo, err := git.PlainOpen(dir) + require.NoError(t, err) + + return repo +} + +// writeV1Checkpoint writes a checkpoint to the v1 branch for testing. +func writeV1Checkpoint(t *testing.T, store *checkpoint.GitStore, cpID id.CheckpointID, sessionID string, transcript []byte, prompts []string) { + t.Helper() + err := store.WriteCommitted(context.Background(), checkpoint.WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: sessionID, + Strategy: "manual-commit", + Transcript: transcript, + Prompts: prompts, + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) +} + +func newMigrateStores(repo *git.Repository) (*checkpoint.GitStore, *checkpoint.V2GitStore) { + return checkpoint.NewGitStore(repo), checkpoint.NewV2GitStore(repo, migrateRemoteName) +} + +func buildTasksTreeHash(t *testing.T, repo *git.Repository, toolUseID string) plumbing.Hash { + t.Helper() + + blobHash, err := checkpoint.CreateBlobFromContent(repo, []byte(`{"tool_use_id":"`+toolUseID+`"}`)) + require.NoError(t, err) + + treeHash, err := checkpoint.BuildTreeFromEntries(repo, map[string]object.TreeEntry{ + toolUseID + "/checkpoint.json": {Mode: filemode.Regular, Hash: blobHash}, + }) + require.NoError(t, err) + + return treeHash +} + +func TestMigrateCheckpointsV2_Basic(t *testing.T) { + t.Parallel() + repo := initMigrateTestRepo(t) + v1Store, v2Store := newMigrateStores(repo) + + cpID := id.MustCheckpointID("a1b2c3d4e5f6") + writeV1Checkpoint(t, v1Store, cpID, "session-001", + []byte("{\"type\":\"assistant\",\"message\":\"hello\"}\n"), + []string{"test prompt"}, + ) + + var stdout bytes.Buffer + + result, err := migrateCheckpointsV2(context.Background(), repo, v1Store, v2Store, &stdout) + require.NoError(t, err) + assert.Equal(t, 1, result.migrated) + assert.Equal(t, 0, result.skipped) + assert.Equal(t, 0, result.failed) + + // Verify checkpoint exists in v2 + summary, err := v2Store.ReadCommitted(context.Background(), cpID) + require.NoError(t, err) + require.NotNil(t, summary, "checkpoint should exist in v2 after migration") + assert.Equal(t, cpID, summary.CheckpointID) +} + +func TestMigrateCheckpointsV2_Idempotent(t *testing.T) { + t.Parallel() + repo := initMigrateTestRepo(t) + v1Store, v2Store := newMigrateStores(repo) + + cpID := id.MustCheckpointID("c3d4e5f6a1b2") + writeV1Checkpoint(t, v1Store, cpID, "session-idem", + []byte("{\"type\":\"assistant\",\"message\":\"idempotent test\"}\n"), + []string{"idem prompt"}, + ) + + var stdout bytes.Buffer + + // First run: should migrate + result1, err := migrateCheckpointsV2(context.Background(), repo, v1Store, v2Store, &stdout) + require.NoError(t, err) + assert.Equal(t, 1, result1.migrated) + assert.Equal(t, 0, result1.skipped) + + // Second run: should skip (no agent type means backfill also can't produce compact transcript) + stdout.Reset() + result2, err := migrateCheckpointsV2(context.Background(), repo, v1Store, v2Store, &stdout) + require.NoError(t, err) + assert.Equal(t, 0, result2.migrated) + assert.Equal(t, 1, result2.skipped) +} + +func TestMigrateCheckpointsV2_MultiSession(t *testing.T) { + t.Parallel() + repo := initMigrateTestRepo(t) + v1Store, v2Store := newMigrateStores(repo) + + cpID := id.MustCheckpointID("d4e5f6a1b2c3") + + // Write first session + writeV1Checkpoint(t, v1Store, cpID, "session-multi-1", + []byte("{\"type\":\"assistant\",\"message\":\"session 1\"}\n"), + []string{"prompt 1"}, + ) + + // Write second session to same checkpoint + writeV1Checkpoint(t, v1Store, cpID, "session-multi-2", + []byte("{\"type\":\"assistant\",\"message\":\"session 2\"}\n"), + []string{"prompt 2"}, + ) + + var stdout bytes.Buffer + + result, err := migrateCheckpointsV2(context.Background(), repo, v1Store, v2Store, &stdout) + require.NoError(t, err) + assert.Equal(t, 1, result.migrated) + + // Verify both sessions are in v2 + summary, readErr := v2Store.ReadCommitted(context.Background(), cpID) + require.NoError(t, readErr) + require.NotNil(t, summary) + assert.GreaterOrEqual(t, len(summary.Sessions), 2, "should have at least 2 sessions") +} + +func TestMigrateCheckpointsV2_NoV1Branch(t *testing.T) { + t.Parallel() + repo := initMigrateTestRepo(t) + v1Store, v2Store := newMigrateStores(repo) + var stdout bytes.Buffer + + // No v1 data written — ListCommitted returns empty + result, err := migrateCheckpointsV2(context.Background(), repo, v1Store, v2Store, &stdout) + require.NoError(t, err) + assert.Equal(t, 0, result.migrated) + assert.Contains(t, stdout.String(), "Nothing to migrate") +} + +func TestMigrateCmd_InvalidFlag(t *testing.T) { + t.Parallel() + cmd := newMigrateCmd() + cmd.SetArgs([]string{"--checkpoints", "v3"}) + + err := cmd.Execute() + require.Error(t, err) + assert.Contains(t, err.Error(), "unsupported checkpoints version") +} + +func TestMigrateCheckpointsV2_CompactionSkipped(t *testing.T) { + t.Parallel() + repo := initMigrateTestRepo(t) + v1Store, v2Store := newMigrateStores(repo) + + cpID := id.MustCheckpointID("e5f6a1b2c3d4") + // Write checkpoint with no agent type — compaction will be skipped + err := v1Store.WriteCommitted(context.Background(), checkpoint.WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "session-noagent", + Strategy: "manual-commit", + Transcript: []byte("{\"type\":\"assistant\",\"message\":\"no agent\"}\n"), + Prompts: []string{"compact fail prompt"}, + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) + + var stdout bytes.Buffer + + result, migrateErr := migrateCheckpointsV2(context.Background(), repo, v1Store, v2Store, &stdout) + require.NoError(t, migrateErr) + assert.Equal(t, 1, result.migrated) + assert.Contains(t, stdout.String(), "compact transcript not generated") +} + +func TestMigrateCheckpointsV2_TaskCheckpoint(t *testing.T) { + t.Parallel() + repo := initMigrateTestRepo(t) + v1Store, v2Store := newMigrateStores(repo) + + cpID := id.MustCheckpointID("b2c3d4e5f6a1") + err := v1Store.WriteCommitted(context.Background(), checkpoint.WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "session-task-001", + Strategy: "manual-commit", + Transcript: []byte("{\"type\":\"assistant\",\"message\":\"task work\"}\n"), + Prompts: []string{"task prompt"}, + IsTask: true, + ToolUseID: "toolu_01ABC", + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) + + var stdout bytes.Buffer + + result, migrateErr := migrateCheckpointsV2(context.Background(), repo, v1Store, v2Store, &stdout) + require.NoError(t, migrateErr) + assert.Equal(t, 1, result.migrated) + + // Verify task checkpoint exists in v2 + summary, readErr := v2Store.ReadCommitted(context.Background(), cpID) + require.NoError(t, readErr) + require.NotNil(t, summary) + + // Verify task metadata tree was copied into v2 /full/current. + _, rootTreeHash, refErr := v2Store.GetRefState(plumbing.ReferenceName(paths.V2FullCurrentRefName)) + require.NoError(t, refErr) + rootTree, treeErr := repo.TreeObject(rootTreeHash) + require.NoError(t, treeErr) + _, taskFileErr := rootTree.File(cpID.Path() + "/0/tasks/toolu_01ABC/checkpoint.json") + require.NoError(t, taskFileErr, "expected migrated task checkpoint metadata in /full/current") +} + +func TestMigrateCheckpointsV2_AllSkippedOnRerun(t *testing.T) { + t.Parallel() + repo := initMigrateTestRepo(t) + v1Store, v2Store := newMigrateStores(repo) + + cpID1 := id.MustCheckpointID("f6a1b2c3d4e5") + cpID2 := id.MustCheckpointID("a1b2c3d4e5f7") + + writeV1Checkpoint(t, v1Store, cpID1, "session-p1", + []byte("{\"type\":\"assistant\",\"message\":\"first\"}\n"), + []string{"prompt 1"}, + ) + writeV1Checkpoint(t, v1Store, cpID2, "session-p2", + []byte("{\"type\":\"assistant\",\"message\":\"second\"}\n"), + []string{"prompt 2"}, + ) + + // First run: migrates both + var discard bytes.Buffer + result1, err := migrateCheckpointsV2(context.Background(), repo, v1Store, v2Store, &discard) + require.NoError(t, err) + assert.Equal(t, 2, result1.migrated) + + // Second run: skips both + var stdout bytes.Buffer + result2, err := migrateCheckpointsV2(context.Background(), repo, v1Store, v2Store, &stdout) + require.NoError(t, err) + assert.Equal(t, 0, result2.migrated) + assert.Equal(t, 2, result2.skipped) +} + +func TestMigrateCheckpointsV2_BackfillCompactTranscript(t *testing.T) { + t.Parallel() + repo := initMigrateTestRepo(t) + v1Store, v2Store := newMigrateStores(repo) + + cpID := id.MustCheckpointID("aabb11223344") + + // Write v1 checkpoint with agent type (so compaction can succeed) + err := v1Store.WriteCommitted(context.Background(), checkpoint.WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "session-backfill", + Strategy: "manual-commit", + Transcript: []byte("{\"type\":\"user\",\"message\":{\"role\":\"user\",\"content\":\"hello\"}}\n{\"type\":\"assistant\",\"message\":{\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"hi\"}]}}\n"), + Prompts: []string{"hello"}, + Agent: "Claude Code", + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) + + // Write to v2 WITHOUT compact transcript (simulating earlier migration) + err = v2Store.WriteCommitted(context.Background(), checkpoint.WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "session-backfill", + Strategy: "manual-commit", + Transcript: []byte("{\"type\":\"user\",\"message\":{\"role\":\"user\",\"content\":\"hello\"}}\n"), + Prompts: []string{"hello"}, + Agent: "Claude Code", + AuthorName: "Test", + AuthorEmail: "test@test.com", + // CompactTranscript intentionally nil + }) + require.NoError(t, err) + + // Verify no transcript.jsonl on /main yet + summary, err := v2Store.ReadCommitted(context.Background(), cpID) + require.NoError(t, err) + require.NotNil(t, summary) + assert.Empty(t, summary.Sessions[0].Transcript, "should have no compact transcript before backfill") + + // Run migration — should backfill the compact transcript + var stdout bytes.Buffer + result, migrateErr := migrateCheckpointsV2(context.Background(), repo, v1Store, v2Store, &stdout) + require.NoError(t, migrateErr) + assert.Equal(t, 1, result.migrated, "backfill should count as migrated") + assert.Equal(t, 0, result.skipped) + assert.Contains(t, stdout.String(), "added transcript.jsonl") + + // Verify transcript.jsonl now exists + summary2, err := v2Store.ReadCommitted(context.Background(), cpID) + require.NoError(t, err) + require.NotNil(t, summary2) + assert.NotEmpty(t, summary2.Sessions[0].Transcript, "should have compact transcript after backfill") +} + +func TestMigrateCheckpointsV2_RepairsMissingFullTranscriptBeforeBackfill(t *testing.T) { + t.Parallel() + repo := initMigrateTestRepo(t) + v1Store, v2Store := newMigrateStores(repo) + + cpID := id.MustCheckpointID("112233aabbcc") + writeV1Checkpoint(t, v1Store, cpID, "session-repair-001", + []byte("{\"type\":\"assistant\",\"message\":\"repair me\"}\n"), + []string{"repair prompt"}, + ) + + // Initial migration to create v2 state. + var initialRun bytes.Buffer + result1, err := migrateCheckpointsV2(context.Background(), repo, v1Store, v2Store, &initialRun) + require.NoError(t, err) + assert.Equal(t, 1, result1.migrated) + + // Simulate interrupted migration by removing raw transcript files from /full/current. + removeV2SessionTranscriptFiles(t, repo, v2Store, cpID, 0) + + // Re-run migration: should repair /full/current and count as migrated (not skipped). + var rerun bytes.Buffer + result2, rerunErr := migrateCheckpointsV2(context.Background(), repo, v1Store, v2Store, &rerun) + require.NoError(t, rerunErr) + assert.Equal(t, 1, result2.migrated) + assert.Equal(t, 0, result2.failed) + assert.Contains(t, rerun.String(), "repaired partial v2 checkpoint state") + + content, readErr := v2Store.ReadSessionContent(context.Background(), cpID, 0) + require.NoError(t, readErr) + assert.NotEmpty(t, content.Transcript, "raw full transcript should be restored in /full/current") +} + +func TestMigrateCheckpointsV2_RepairsCurrentFullEvenWhenArchiveExists(t *testing.T) { + t.Parallel() + repo := initMigrateTestRepo(t) + v1Store, v2Store := newMigrateStores(repo) + + cpID := id.MustCheckpointID("334455ddeeff") + writeV1Checkpoint(t, v1Store, cpID, "session-repair-archive-001", + []byte("{\"type\":\"assistant\",\"message\":\"repair from archive fallback\"}\n"), + []string{"repair archive prompt"}, + ) + + // Initial migration to seed v2. + var initialRun bytes.Buffer + result1, err := migrateCheckpointsV2(context.Background(), repo, v1Store, v2Store, &initialRun) + require.NoError(t, err) + assert.Equal(t, 1, result1.migrated) + + // Preserve current generation as an archived ref to simulate fallback availability. + currentCommitHash, _, refErr := v2Store.GetRefState(plumbing.ReferenceName(paths.V2FullCurrentRefName)) + require.NoError(t, refErr) + archiveRef := plumbing.ReferenceName(paths.V2FullRefPrefix + "0000000000001") + require.NoError(t, repo.Storer.SetReference(plumbing.NewHashReference(archiveRef, currentCommitHash))) + + // Remove current /full/current transcript artifacts. + removeV2SessionTranscriptFiles(t, repo, v2Store, cpID, 0) + + // Sanity-check fallback exists: ReadSessionContent can still read from archive. + archivedRead, archivedReadErr := v2Store.ReadSessionContent(context.Background(), cpID, 0) + require.NoError(t, archivedReadErr) + assert.NotEmpty(t, archivedRead.Transcript) + + // Re-run migration: should still repair /full/current. + var rerun bytes.Buffer + result2, rerunErr := migrateCheckpointsV2(context.Background(), repo, v1Store, v2Store, &rerun) + require.NoError(t, rerunErr) + assert.Equal(t, 1, result2.migrated) + assert.Contains(t, rerun.String(), "repaired partial v2 checkpoint state") + + ok, checkErr := hasCurrentFullSessionArtifacts(repo, v2Store, cpID, 0) + require.NoError(t, checkErr) + assert.True(t, ok, "expected /full/current artifacts to be restored") +} + +func removeV2SessionTranscriptFiles(t *testing.T, repo *git.Repository, v2Store *checkpoint.V2GitStore, cpID id.CheckpointID, sessionIdx int) { + t.Helper() + + refName := plumbing.ReferenceName(paths.V2FullCurrentRefName) + parentHash, rootTreeHash, err := v2Store.GetRefState(refName) + require.NoError(t, err) + + newRootHash, updateErr := checkpoint.UpdateSubtree( + repo, + rootTreeHash, + []string{string(cpID[:2]), string(cpID[2:]), strconv.Itoa(sessionIdx)}, + nil, + checkpoint.UpdateSubtreeOptions{ + MergeMode: checkpoint.MergeKeepExisting, + DeleteNames: []string{ + paths.TranscriptFileName, + paths.TranscriptFileName + ".001", + paths.TranscriptFileName + ".002", + paths.ContentHashFileName, + }, + }, + ) + require.NoError(t, updateErr) + + commitHash, commitErr := checkpoint.CreateCommit(repo, newRootHash, parentHash, "test: remove full transcript\n", "Test", "test@test.com") + require.NoError(t, commitErr) + require.NoError(t, repo.Storer.SetReference(plumbing.NewHashReference(refName, commitHash))) +} + +func TestBuildMigrateWriteOpts_PromptSeparatorRoundTrip(t *testing.T) { + t.Parallel() + + cpID := id.MustCheckpointID("123456abcdef") + rawPrompts := strings.Join([]string{ + "first line\nwith newline", + "second prompt", + }, checkpoint.PromptSeparator) + + opts := buildMigrateWriteOpts(&checkpoint.SessionContent{ + Metadata: checkpoint.CommittedMetadata{ + SessionID: "session-prompts-001", + Strategy: "manual-commit", + }, + Prompts: rawPrompts, + }, checkpoint.CommittedInfo{ + CheckpointID: cpID, + }) + + require.Len(t, opts.Prompts, 2) + assert.Equal(t, "first line\nwith newline", opts.Prompts[0]) + assert.Equal(t, "second prompt", opts.Prompts[1]) +} + +func TestSpliceTasksTreeToV2_MergesTaskDirectories(t *testing.T) { + t.Parallel() + + repo := initMigrateTestRepo(t) + _, v2Store := newMigrateStores(repo) + cpID := id.MustCheckpointID("123abc456def") + + err := v2Store.WriteCommitted(context.Background(), checkpoint.WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "session-001", + Strategy: "manual-commit", + Agent: "Cursor", + Transcript: []byte(`{"type":"assistant","message":"seed"}`), + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + require.NoError(t, err) + + rootTasksHash := buildTasksTreeHash(t, repo, "toolu_root") + sessionTasksHash := buildTasksTreeHash(t, repo, "toolu_session") + + require.NoError(t, spliceTasksTreeToV2(repo, v2Store, cpID, 0, rootTasksHash)) + require.NoError(t, spliceTasksTreeToV2(repo, v2Store, cpID, 0, sessionTasksHash)) + + _, rootTreeHash, refErr := v2Store.GetRefState(plumbing.ReferenceName(paths.V2FullCurrentRefName)) + require.NoError(t, refErr) + rootTree, treeErr := repo.TreeObject(rootTreeHash) + require.NoError(t, treeErr) + + _, err = rootTree.File(cpID.Path() + "/0/tasks/toolu_root/checkpoint.json") + require.NoError(t, err, "root task metadata should be preserved") + _, err = rootTree.File(cpID.Path() + "/0/tasks/toolu_session/checkpoint.json") + require.NoError(t, err, "session task metadata should be preserved") +} diff --git a/cmd/entire/cli/root.go b/cmd/entire/cli/root.go index abdda9958..dda4903a4 100644 --- a/cmd/entire/cli/root.go +++ b/cmd/entire/cli/root.go @@ -100,6 +100,7 @@ func NewRootCmd() *cobra.Command { cmd.AddCommand(newSendAnalyticsCmd()) cmd.AddCommand(newAttachCmd()) cmd.AddCommand(newCurlBashPostInstallCmd()) + cmd.AddCommand(newMigrateCmd()) cmd.SetVersionTemplate(versionString())