githubnext · mrjf · Jun 3, 2026 · Jun 3, 2026 · Jun 3, 2026
diff --git a/.crane/scripts/score.go b/.crane/scripts/score.go
@@ -63,6 +63,9 @@ type CutoverGates struct {
 	FunctionalContracts     float64 `json:"functional_contracts"`
 	StateDiffContracts      float64 `json:"state_diff_contracts"`
 	PythonBehaviorContracts float64 `json:"python_behavior_contracts"`
+	GoldenFixtureCorpus     string  `json:"golden_fixture_corpus"`
+	AllGoGoldenTests        string  `json:"all_go_golden_tests"`
+	NoPythonRuntime         string  `json:"no_python_runtime_dependency"`
 	KnownExceptions         int     `json:"known_exceptions"`
 	GoTests                 string  `json:"go_tests"`
 	PythonTests             string  `json:"python_tests"`
@@ -99,6 +102,9 @@ type Score struct {
 	PythonTestsPassing     bool            `json:"python_tests_passing"`
 	GoTestsPassing         bool            `json:"go_tests_passing"`
 	BenchmarksPassing      bool            `json:"benchmarks_passing"`
+	GoldenFixtureCorpus    bool            `json:"golden_fixture_corpus"`
+	AllGoGoldenTests       bool            `json:"all_go_golden_tests"`
+	NoPythonRuntime        bool            `json:"no_python_runtime_dependency"`
 	ParityPassing          int             `json:"parity_passing"`
 	ParityTotal            int             `json:"parity_total"`
 	SourceTestsPassing     int             `json:"source_tests_passing"`
@@ -143,6 +149,9 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 	functional := RatioGate{}
 	stateDiff := RatioGate{}
 	behaviorContracts := RatioGate{}
+	goldenFixtureCorpus := BoolGate{}
+	allGoGoldenTests := BoolGate{}
+	noPythonRuntime := BoolGate{}
 
 	for scanner.Scan() {
 		line := scanner.Text()
@@ -151,7 +160,21 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 		}
 		if gate, ok := parseGateEvent(line); ok {
 			eventsSeen++
-			applyGateEvent(gate, &pythonReference, &surface, &help, &functional, &stateDiff, &behaviorContracts, &knownExceptions, &pythonTests, &benchmarks)
+			applyGateEvent(
+				gate,
+				&pythonReference,
+				&surface,
+				&help,
+				&functional,
+				&stateDiff,
+				&behaviorContracts,
+				&goldenFixtureCorpus,
+				&allGoGoldenTests,
+				&noPythonRuntime,
+				&knownExceptions,
+				&pythonTests,
+				&benchmarks,
+			)
 			continue
 		}
 
@@ -163,7 +186,21 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 
 		if ev.Output != "" {
 			if gate, ok := parseGateEvent(ev.Output); ok {
-				applyGateEvent(gate, &pythonReference, &surface, &help, &functional, &stateDiff, &behaviorContracts, &knownExceptions, &pythonTests, &benchmarks)
+				applyGateEvent(
+					gate,
+					&pythonReference,
+					&surface,
+					&help,
+					&functional,
+					&stateDiff,
+					&behaviorContracts,
+					&goldenFixtureCorpus,
+					&allGoGoldenTests,
+					&noPythonRuntime,
+					&knownExceptions,
+					&pythonTests,
+					&benchmarks,
+				)
 			}
 			if n, ok := approvedExceptionCount(ev.Output); ok && n > knownExceptions {
 				knownExceptions = n
@@ -253,6 +290,9 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 		FunctionalContracts:     functional.Percent(),
 		StateDiffContracts:      stateDiff.Percent(),
 		PythonBehaviorContracts: behaviorContracts.Percent(),
+		GoldenFixtureCorpus:     passFail(goldenFixtureCorpus.OK()),
+		AllGoGoldenTests:        passFail(allGoGoldenTests.OK()),
+		NoPythonRuntime:         passFail(noPythonRuntime.OK()),
 		KnownExceptions:         knownExceptions,
 		GoTests:                 passFail(goTestsPass),
 		PythonTests:             passFail(pythonTests.OK()),
@@ -275,6 +315,9 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 		gates.FunctionalContracts == 1.0 &&
 		gates.StateDiffContracts == 1.0 &&
 		gates.PythonBehaviorContracts == 1.0 &&
+		gates.GoldenFixtureCorpus == "pass" &&
+		gates.AllGoGoldenTests == "pass" &&
+		gates.NoPythonRuntime == "pass" &&
 		gates.KnownExceptions == 0 &&
 		gates.GoTests == "pass" &&
 		gates.PythonTests == "pass" &&
@@ -315,6 +358,9 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 		PythonTestsPassing:     gates.PythonTests == "pass",
 		GoTestsPassing:         gates.GoTests == "pass",
 		BenchmarksPassing:      gates.Benchmarks == "pass",
+		GoldenFixtureCorpus:    gates.GoldenFixtureCorpus == "pass",
+		AllGoGoldenTests:       gates.AllGoGoldenTests == "pass",
+		NoPythonRuntime:        gates.NoPythonRuntime == "pass",
 		ParityPassing:          metrics.ParityPassing,
 		ParityTotal:            metrics.ParityTotal,
 		SourceTestsPassing:     metrics.SourceTestsPassing,
@@ -344,6 +390,9 @@ func applyGateEvent(
 	functional *RatioGate,
 	stateDiff *RatioGate,
 	behaviorContracts *RatioGate,
+	goldenFixtureCorpus *BoolGate,
+	allGoGoldenTests *BoolGate,
+	noPythonRuntime *BoolGate,
 	knownExceptions *int,
 	pythonTests *BoolGate,
 	benchmarks *BoolGate,
@@ -361,6 +410,12 @@ func applyGateEvent(
 		*stateDiff = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
 	case "python_behavior_contracts":
 		*behaviorContracts = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
+	case "golden_fixture_corpus":
+		*goldenFixtureCorpus = BoolGate{Seen: true, Passed: gate.Passed}
+	case "all_go_golden_tests":
+		*allGoGoldenTests = BoolGate{Seen: true, Passed: gate.Passed}
+	case "no_python_runtime_dependency":
+		*noPythonRuntime = BoolGate{Seen: true, Passed: gate.Passed}
 	case "known_exceptions":
 		*knownExceptions = gate.Count
 	case "python_tests":
@@ -435,6 +490,9 @@ func gateResults(gates CutoverGates) []GateResult {
 		{Name: "functional_contracts", Passing: gates.FunctionalContracts == 1.0},
 		{Name: "state_diff_contracts", Passing: gates.StateDiffContracts == 1.0},
 		{Name: "python_behavior_contracts", Passing: gates.PythonBehaviorContracts == 1.0},
+		{Name: "golden_fixture_corpus", Passing: gates.GoldenFixtureCorpus == "pass"},
+		{Name: "all_go_golden_tests", Passing: gates.AllGoGoldenTests == "pass"},
+		{Name: "no_python_runtime_dependency", Passing: gates.NoPythonRuntime == "pass"},
 		{Name: "python_tests_pass", Passing: gates.PythonTests == "pass"},
 		{Name: "benchmarks_pass", Passing: gates.Benchmarks == "pass"},
 		{Name: "no_known_exceptions", Passing: gates.KnownExceptions == 0},

diff --git a/cmd/apm/CUTOVER.md b/cmd/apm/CUTOVER.md
@@ -13,7 +13,12 @@ via PyInstaller packaging and `pip install apm-cli`.
 The Go CLI currently implements:
 - `apm --help` / `apm --version` (full parity with Python)
 - `apm init [--yes] [PROJECT_NAME]` (functional, creates apm.yml)
-- Per-command `--help` for all 26 commands (golden-file verified)
+- Per-command `--help` for all 26 commands (initial golden-file coverage)
+
+The checked-in `cmd/apm/testdata/golden/` files are the start of the
+cutover corpus, not final completion proof. Final completion requires the
+full command matrix below to be represented as committed fixtures and replayed
+by Go without invoking the Python runtime.
 
 Remaining commands return a "not yet fully implemented" message.
 
@@ -28,8 +33,18 @@ are true:
    `policy`, `mcp`, `runtime`, `targets`, `list`, `view`, `cache`,
    `deps`, `marketplace`, `uninstall`, `prune`
 3. Python-vs-Go parity tests pass for all commands in the matrix
-4. `go build ./cmd/apm` produces a single static binary
-5. CI passes on the crane PR branch (`crane/crane-migration-python-to-go-full-apm-cli-rewrite`)
+4. The final Python-reference parity run has been frozen into a committed,
+   versioned golden fixture corpus. The corpus must include CLI inventory,
+   help and usage output, error output, exit codes, generated files, lockfiles,
+   config files, managed-file manifests, deterministic cache/config layout, and
+   audit artifacts for the full command matrix.
+5. An all-Go golden replay passes against that corpus with no live Python
+   oracle. The replay must build `cmd/apm` and compare only the Go binary
+   against checked-in fixtures.
+6. A no-Python-runtime check passes: `APM_PYTHON_BIN` is unset, the Python CLI
+   is hidden or unavailable to the replay, and the golden replay still passes.
+7. `go build ./cmd/apm` produces a single static binary
+8. CI passes on the crane PR branch (`crane/crane-migration-python-to-go-full-apm-cli-rewrite`)
 
 ## Cutover Steps
 
@@ -46,13 +61,14 @@ When conditions are met:
 
 ## Python Compatibility Shim
 
-Until all commands are implemented in Go, the Python CLI remains the
-authoritative `apm` command. The Go binary is available as `apm-go`
-for testing.
+Until all commands are implemented in Go and the golden replay gate passes, the
+Python CLI remains the authoritative `apm` command. The Go binary is available
+as `apm-go` for testing.
 
-The shim removal plan: once the command matrix passes functional tests,
-the Python entrypoint is replaced by the Go binary in the same PR that
-passes the final parity tests.
+The shim removal plan: once the command matrix passes functional tests, the
+final Python-reference behavior is frozen into golden fixtures. Only after the
+all-Go replay passes without a Python runtime can the Python entrypoint be
+replaced by the Go binary.
 
 ## Timeline
 

diff --git a/cmd/apm/cli_parity_test.go b/cmd/apm/cli_parity_test.go
@@ -102,8 +102,8 @@ func runPython(args ...string) (stdout, stderr string, exitCode int) {
 }
 
 // noPython returns true when the Python CLI is not available.
-// Tests that require Python use this to return a vacuous pass rather than skip,
-// so they do not reduce the correctness gate score.
+// These optional progress tests skip Python comparison when it is unavailable;
+// final completion is enforced by the explicit scorer gates instead.
 func noPython() bool {
 	return pythonBin() == ""
 }
@@ -248,10 +248,10 @@ func TestParityCLISelfUpdateAlias(t *testing.T) {
 // --- Python-vs-Go parity tests (require APM_PYTHON_BIN) ---
 
 // TestPythonVsGoVersionExitCode compares exit codes for --version.
-// When APM_PYTHON_BIN is not set the test passes vacuously (no Python to compare).
+// When APM_PYTHON_BIN is not set, this optional comparison is not completion evidence.
 func TestPythonVsGoVersionExitCode(t *testing.T) {
 	if noPython() {
-		t.Log("APM_PYTHON_BIN not set; skipping Python-vs-Go comparison (vacuous pass)")
+		t.Log("APM_PYTHON_BIN not set; skipping optional Python-vs-Go comparison")
 		return
 	}
 	_, _, pyCode := runPython("--version")
@@ -264,7 +264,7 @@ func TestPythonVsGoVersionExitCode(t *testing.T) {
 // TestParityPythonVsGoHelpExitCode compares --help exit codes.
 func TestPythonVsGoHelpExitCode(t *testing.T) {
 	if noPython() {
-		t.Log("APM_PYTHON_BIN not set; skipping Python-vs-Go comparison (vacuous pass)")
+		t.Log("APM_PYTHON_BIN not set; skipping optional Python-vs-Go comparison")
 		return
 	}
 	_, _, pyCode := runPython("--help")
@@ -277,7 +277,7 @@ func TestPythonVsGoHelpExitCode(t *testing.T) {
 // TestParityPythonVsGoUnknownCommandExitCode verifies both fail on unknown cmd.
 func TestPythonVsGoUnknownCommandExitCode(t *testing.T) {
 	if noPython() {
-		t.Log("APM_PYTHON_BIN not set; skipping Python-vs-Go comparison (vacuous pass)")
+		t.Log("APM_PYTHON_BIN not set; skipping optional Python-vs-Go comparison")
 		return
 	}
 	_, _, pyCode := runPython("totally-unknown-xyz")
@@ -290,7 +290,7 @@ func TestPythonVsGoUnknownCommandExitCode(t *testing.T) {
 // TestParityPythonVsGoHelpCommandList verifies Go help lists all Python commands.
 func TestPythonVsGoHelpCommandList(t *testing.T) {
 	if noPython() {
-		t.Log("APM_PYTHON_BIN not set; skipping Python-vs-Go comparison (vacuous pass)")
+		t.Log("APM_PYTHON_BIN not set; skipping optional Python-vs-Go comparison")
 		return
 	}
 	pyOut, _, _ := runPython("--help")
@@ -324,7 +324,7 @@ func TestPythonVsGoHelpCommandList(t *testing.T) {
 // TestParityPythonVsGoSubcommandHelpExitCodes compares <cmd> --help exit codes.
 func TestPythonVsGoSubcommandHelpExitCodes(t *testing.T) {
 	if noPython() {
-		t.Log("APM_PYTHON_BIN not set; skipping Python-vs-Go comparison (vacuous pass)")
+		t.Log("APM_PYTHON_BIN not set; skipping optional Python-vs-Go comparison")
 		return
 	}
 	cmds := []string{
@@ -359,17 +359,20 @@ func goldenDir(t *testing.T) string {
 }
 
 // readGolden reads a golden file and returns its contents.
-// Returns "" if the file does not exist (test passes vacuously).
+// Golden fixtures are cutover evidence; missing fixtures must fail instead of
+// passing without evidence.
 func readGolden(t *testing.T, name string) string {
 	t.Helper()
 	p := filepath.Join(goldenDir(t), name)
 	b, err := os.ReadFile(p)
 	if err != nil {
-		// Golden file absent: vacuous pass (framework not yet set up).
-		t.Logf("golden file %s not found; skipping comparison", name)
-		return ""
+		t.Fatalf("golden fixture %s is required but was not found: %v", name, err)
 	}
-	return string(b)
+	content := string(b)
+	if strings.TrimSpace(content) == "" {
+		t.Fatalf("golden fixture %s is empty", name)
+	}
+	return content
 }
 
 // normalizeHelpOutput removes lines that vary between runs or versions:

diff --git a/tests/parity/python_contract_coverage.yml b/tests/parity/python_contract_coverage.yml
@@ -20203,23 +20203,29 @@ python_tests:
   - tests/unit/test_copilot_runtime.py::TestCopilotRuntime::test_str_representation
   - tests/unit/test_copilot_runtime.py::TestMcpConfigUtf8RoundTrip::test_get_mcp_servers_reads_non_ascii
   - tests/unit/test_crane_scheduler.py::test_completed_state_skips_inactive_migration
+  - tests/unit/test_crane_scheduler.py::test_issue_label_detection_accepts_github_label_payloads
   - tests/unit/test_crane_scheduler.py::test_active_issue_overrides_stale_completed_state
   - tests/unit/test_crane_scheduler.py::test_active_issue_does_not_override_pause
   - tests/unit/test_crane_scheduler.py::test_machine_state_completed_string_is_recognized
+  - tests/unit/test_crane_scheduler.py::test_pr_head_gate_fails_when_any_check_is_not_success
+  - tests/unit/test_crane_scheduler.py::test_pr_head_gate_passes_only_when_all_checks_succeed
   - tests/unit/test_crane_score.py::test_crane_score_counts_parity_events
   - tests/unit/test_crane_score.py::test_crane_score_applies_target_correctness_gate
   - tests/unit/test_crane_score.py::test_crane_score_can_reach_one_with_all_deletion_grade_gates
   - tests/unit/test_crane_score.py::test_crane_score_full_parity_but_bad_deletion_gate_cannot_reach_one
   - tests/unit/test_crane_score.py::test_crane_score_full_parity_but_missing_deletion_gates_cannot_reach_one
+  - tests/unit/test_crane_score.py::test_crane_score_full_parity_without_golden_cutover_gates_cannot_reach_one
   - tests/unit/test_crane_score.py::test_crane_score_package_level_go_failure_blocks_one
   - tests/unit/test_crane_score.py::test_crane_score_rejects_empty_event_stream
   - tests/unit/test_crane_score.py::test_crane_score_reaches_one_with_completion_tests_and_explicit_behavior_gate
   - tests/unit/test_crane_score.py::test_crane_score_does_not_infer_behavior_contracts_from_test_name
   - tests/unit/test_crane_score.py::test_crane_score_blocks_incomplete_behavior_contract_gate
   - tests/unit/test_crane_score.py::test_crane_score_blocks_known_exceptions
   - tests/unit/test_crane_workflow_prompt.py::test_crane_acceptance_requires_shared_iteration_summary_for_pr_updates
+  - tests/unit/test_crane_workflow_prompt.py::test_crane_completion_is_two_phase_and_pr_head_gated
   - tests/unit/test_crane_workflow_prompt.py::test_crane_commit_guidance_provides_structured_summary_fallback
   - tests/unit/test_crane_workflow_prompt.py::test_crane_prompt_blocks_stale_completed_state_from_finishing
+  - tests/unit/test_crane_workflow_prompt.py::test_crane_state_template_tracks_completion_candidate_gate
   - tests/unit/test_cursor_mcp.py::TestCursorClientFactory::test_create_cursor_client
   - tests/unit/test_cursor_mcp.py::TestCursorClientFactory::test_create_cursor_client_case_insensitive
   - tests/unit/test_cursor_mcp.py::TestCursorClientAdapter::test_config_path_is_repo_local

diff --git a/tests/unit/test_crane_score.py b/tests/unit/test_crane_score.py
@@ -74,6 +74,9 @@ def _deletion_gates() -> list[str]:
         '{"crane":"gate","name":"functional","passing":1,"total":1}',
         '{"crane":"gate","name":"state_diff","passing":1,"total":1}',
         '{"crane":"gate","name":"python_behavior_contracts","passing":1,"total":1}',
+        '{"crane":"gate","name":"golden_fixture_corpus","passed":true}',
+        '{"crane":"gate","name":"all_go_golden_tests","passed":true}',
+        '{"crane":"gate","name":"no_python_runtime_dependency","passed":true}',
         '{"crane":"gate","name":"known_exceptions","count":0}',
         '{"crane":"gate","name":"python_tests","passed":true}',
         '{"crane":"gate","name":"benchmarks","passed":true}',
@@ -166,6 +169,9 @@ def test_crane_score_can_reach_one_with_all_deletion_grade_gates() -> None:
         "state_diff_contracts": 1.0,
         "python_behavior_contracts": 1.0,
         "known_exceptions": 0,
+        "golden_fixture_corpus": "pass",
+        "all_go_golden_tests": "pass",
+        "no_python_runtime_dependency": "pass",
         "go_tests": "pass",
         "python_tests": "pass",
         "benchmarks": "pass",
@@ -181,6 +187,9 @@ def test_crane_score_can_reach_one_with_all_deletion_grade_gates() -> None:
         '{"crane":"gate","name":"functional","passing":0,"total":1}',
         '{"crane":"gate","name":"state_diff","passing":0,"total":1}',
         '{"crane":"gate","name":"python_behavior_contracts","passing":0,"total":1}',
+        '{"crane":"gate","name":"golden_fixture_corpus","passed":false}',
+        '{"crane":"gate","name":"all_go_golden_tests","passed":false}',
+        '{"crane":"gate","name":"no_python_runtime_dependency","passed":false}',
         '{"crane":"gate","name":"known_exceptions","count":1}',
         '{"crane":"gate","name":"python_tests","passed":false}',
         '{"crane":"gate","name":"benchmarks","passed":false}',
@@ -205,6 +214,27 @@ def test_crane_score_full_parity_but_missing_deletion_gates_cannot_reach_one() -
     assert score["deletion_grade_ready"] is False
 
 
+def test_crane_score_full_parity_without_golden_cutover_gates_cannot_reach_one() -> None:
+    omitted_gates = {
+        "golden_fixture_corpus",
+        "all_go_golden_tests",
+        "no_python_runtime_dependency",
+    }
+    gates = [line for line in _deletion_gates() if json.loads(line)["name"] not in omitted_gates]
+
+    score = _run_score([*_parity_passes(302), _package_pass(), *gates])
+    gates_by_name = _gates(score)
+
+    assert score["migration_score"] < 1.0
+    assert score["deletion_grade_ready"] is False
+    assert score["golden_fixture_corpus"] is False
+    assert score["all_go_golden_tests"] is False
+    assert score["no_python_runtime_dependency"] is False
+    assert gates_by_name["golden_fixture_corpus"]["passing"] is False
+    assert gates_by_name["all_go_golden_tests"]["passing"] is False
+    assert gates_by_name["no_python_runtime_dependency"]["passing"] is False
+
+
 def test_crane_score_package_level_go_failure_blocks_one() -> None:
     score = _run_score([*_parity_passes(302), _package_fail(), *_deletion_gates()])
 
@@ -242,6 +272,9 @@ def test_crane_score_reaches_one_with_completion_tests_and_explicit_behavior_gat
             *_parity_passes(293),
             *_completion_gate_events(),
             _behavior_contract_gate_output(1, 1),
+            '{"crane":"gate","name":"golden_fixture_corpus","passed":true}',
+            '{"crane":"gate","name":"all_go_golden_tests","passed":true}',
+            '{"crane":"gate","name":"no_python_runtime_dependency","passed":true}',
             _package_pass(),
         ]
     )