From e14a02833619c4893b9b177bcedd414ddfae4a41 Mon Sep 17 00:00:00 2001 From: mrjf Date: Wed, 3 Jun 2026 15:14:19 -0700 Subject: [PATCH 1/3] Require golden fixture cutover gates --- .crane/scripts/score.go | 62 ++++++++++++++++++++++++++++++++-- cmd/apm/CUTOVER.md | 34 ++++++++++++++----- cmd/apm/cli_parity_test.go | 29 +++++++++------- tests/unit/test_crane_score.py | 37 ++++++++++++++++++++ 4 files changed, 138 insertions(+), 24 deletions(-) diff --git a/.crane/scripts/score.go b/.crane/scripts/score.go index da3d1a4e..f900ca02 100644 --- a/.crane/scripts/score.go +++ b/.crane/scripts/score.go @@ -63,6 +63,9 @@ type CutoverGates struct { FunctionalContracts float64 `json:"functional_contracts"` StateDiffContracts float64 `json:"state_diff_contracts"` PythonBehaviorContracts float64 `json:"python_behavior_contracts"` + GoldenFixtureCorpus string `json:"golden_fixture_corpus"` + AllGoGoldenTests string `json:"all_go_golden_tests"` + NoPythonRuntime string `json:"no_python_runtime_dependency"` KnownExceptions int `json:"known_exceptions"` GoTests string `json:"go_tests"` PythonTests string `json:"python_tests"` @@ -99,6 +102,9 @@ type Score struct { PythonTestsPassing bool `json:"python_tests_passing"` GoTestsPassing bool `json:"go_tests_passing"` BenchmarksPassing bool `json:"benchmarks_passing"` + GoldenFixtureCorpus bool `json:"golden_fixture_corpus"` + AllGoGoldenTests bool `json:"all_go_golden_tests"` + NoPythonRuntime bool `json:"no_python_runtime_dependency"` ParityPassing int `json:"parity_passing"` ParityTotal int `json:"parity_total"` SourceTestsPassing int `json:"source_tests_passing"` @@ -143,6 +149,9 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) { functional := RatioGate{} stateDiff := RatioGate{} behaviorContracts := RatioGate{} + goldenFixtureCorpus := BoolGate{} + allGoGoldenTests := BoolGate{} + noPythonRuntime := BoolGate{} for scanner.Scan() { line := scanner.Text() @@ -151,7 +160,21 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) { } if gate, ok := parseGateEvent(line); ok { eventsSeen++ - applyGateEvent(gate, &pythonReference, &surface, &help, &functional, &stateDiff, &behaviorContracts, &knownExceptions, &pythonTests, &benchmarks) + applyGateEvent( + gate, + &pythonReference, + &surface, + &help, + &functional, + &stateDiff, + &behaviorContracts, + &goldenFixtureCorpus, + &allGoGoldenTests, + &noPythonRuntime, + &knownExceptions, + &pythonTests, + &benchmarks, + ) continue } @@ -163,7 +186,21 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) { if ev.Output != "" { if gate, ok := parseGateEvent(ev.Output); ok { - applyGateEvent(gate, &pythonReference, &surface, &help, &functional, &stateDiff, &behaviorContracts, &knownExceptions, &pythonTests, &benchmarks) + applyGateEvent( + gate, + &pythonReference, + &surface, + &help, + &functional, + &stateDiff, + &behaviorContracts, + &goldenFixtureCorpus, + &allGoGoldenTests, + &noPythonRuntime, + &knownExceptions, + &pythonTests, + &benchmarks, + ) } if n, ok := approvedExceptionCount(ev.Output); ok && n > knownExceptions { knownExceptions = n @@ -253,6 +290,9 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) { FunctionalContracts: functional.Percent(), StateDiffContracts: stateDiff.Percent(), PythonBehaviorContracts: behaviorContracts.Percent(), + GoldenFixtureCorpus: passFail(goldenFixtureCorpus.OK()), + AllGoGoldenTests: passFail(allGoGoldenTests.OK()), + NoPythonRuntime: passFail(noPythonRuntime.OK()), KnownExceptions: knownExceptions, GoTests: passFail(goTestsPass), PythonTests: passFail(pythonTests.OK()), @@ -275,6 +315,9 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) { gates.FunctionalContracts == 1.0 && gates.StateDiffContracts == 1.0 && gates.PythonBehaviorContracts == 1.0 && + gates.GoldenFixtureCorpus == "pass" && + gates.AllGoGoldenTests == "pass" && + gates.NoPythonRuntime == "pass" && gates.KnownExceptions == 0 && gates.GoTests == "pass" && gates.PythonTests == "pass" && @@ -315,6 +358,9 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) { PythonTestsPassing: gates.PythonTests == "pass", GoTestsPassing: gates.GoTests == "pass", BenchmarksPassing: gates.Benchmarks == "pass", + GoldenFixtureCorpus: gates.GoldenFixtureCorpus == "pass", + AllGoGoldenTests: gates.AllGoGoldenTests == "pass", + NoPythonRuntime: gates.NoPythonRuntime == "pass", ParityPassing: metrics.ParityPassing, ParityTotal: metrics.ParityTotal, SourceTestsPassing: metrics.SourceTestsPassing, @@ -344,6 +390,9 @@ func applyGateEvent( functional *RatioGate, stateDiff *RatioGate, behaviorContracts *RatioGate, + goldenFixtureCorpus *BoolGate, + allGoGoldenTests *BoolGate, + noPythonRuntime *BoolGate, knownExceptions *int, pythonTests *BoolGate, benchmarks *BoolGate, @@ -361,6 +410,12 @@ func applyGateEvent( *stateDiff = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total} case "python_behavior_contracts": *behaviorContracts = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total} + case "golden_fixture_corpus": + *goldenFixtureCorpus = BoolGate{Seen: true, Passed: gate.Passed} + case "all_go_golden_tests": + *allGoGoldenTests = BoolGate{Seen: true, Passed: gate.Passed} + case "no_python_runtime_dependency": + *noPythonRuntime = BoolGate{Seen: true, Passed: gate.Passed} case "known_exceptions": *knownExceptions = gate.Count case "python_tests": @@ -435,6 +490,9 @@ func gateResults(gates CutoverGates) []GateResult { {Name: "functional_contracts", Passing: gates.FunctionalContracts == 1.0}, {Name: "state_diff_contracts", Passing: gates.StateDiffContracts == 1.0}, {Name: "python_behavior_contracts", Passing: gates.PythonBehaviorContracts == 1.0}, + {Name: "golden_fixture_corpus", Passing: gates.GoldenFixtureCorpus == "pass"}, + {Name: "all_go_golden_tests", Passing: gates.AllGoGoldenTests == "pass"}, + {Name: "no_python_runtime_dependency", Passing: gates.NoPythonRuntime == "pass"}, {Name: "python_tests_pass", Passing: gates.PythonTests == "pass"}, {Name: "benchmarks_pass", Passing: gates.Benchmarks == "pass"}, {Name: "no_known_exceptions", Passing: gates.KnownExceptions == 0}, diff --git a/cmd/apm/CUTOVER.md b/cmd/apm/CUTOVER.md index 1b554aa1..292993ae 100644 --- a/cmd/apm/CUTOVER.md +++ b/cmd/apm/CUTOVER.md @@ -13,7 +13,12 @@ via PyInstaller packaging and `pip install apm-cli`. The Go CLI currently implements: - `apm --help` / `apm --version` (full parity with Python) - `apm init [--yes] [PROJECT_NAME]` (functional, creates apm.yml) -- Per-command `--help` for all 26 commands (golden-file verified) +- Per-command `--help` for all 26 commands (initial golden-file coverage) + +The checked-in `cmd/apm/testdata/golden/` files are the start of the +cutover corpus, not final completion proof. Final completion requires the +full command matrix below to be represented as committed fixtures and replayed +by Go without invoking the Python runtime. Remaining commands return a "not yet fully implemented" message. @@ -28,8 +33,18 @@ are true: `policy`, `mcp`, `runtime`, `targets`, `list`, `view`, `cache`, `deps`, `marketplace`, `uninstall`, `prune` 3. Python-vs-Go parity tests pass for all commands in the matrix -4. `go build ./cmd/apm` produces a single static binary -5. CI passes on the crane PR branch (`crane/crane-migration-python-to-go-full-apm-cli-rewrite`) +4. The final Python-reference parity run has been frozen into a committed, + versioned golden fixture corpus. The corpus must include CLI inventory, + help and usage output, error output, exit codes, generated files, lockfiles, + config files, managed-file manifests, deterministic cache/config layout, and + audit artifacts for the full command matrix. +5. An all-Go golden replay passes against that corpus with no live Python + oracle. The replay must build `cmd/apm` and compare only the Go binary + against checked-in fixtures. +6. A no-Python-runtime check passes: `APM_PYTHON_BIN` is unset, the Python CLI + is hidden or unavailable to the replay, and the golden replay still passes. +7. `go build ./cmd/apm` produces a single static binary +8. CI passes on the crane PR branch (`crane/crane-migration-python-to-go-full-apm-cli-rewrite`) ## Cutover Steps @@ -46,13 +61,14 @@ When conditions are met: ## Python Compatibility Shim -Until all commands are implemented in Go, the Python CLI remains the -authoritative `apm` command. The Go binary is available as `apm-go` -for testing. +Until all commands are implemented in Go and the golden replay gate passes, the +Python CLI remains the authoritative `apm` command. The Go binary is available +as `apm-go` for testing. -The shim removal plan: once the command matrix passes functional tests, -the Python entrypoint is replaced by the Go binary in the same PR that -passes the final parity tests. +The shim removal plan: once the command matrix passes functional tests, the +final Python-reference behavior is frozen into golden fixtures. Only after the +all-Go replay passes without a Python runtime can the Python entrypoint be +replaced by the Go binary. ## Timeline diff --git a/cmd/apm/cli_parity_test.go b/cmd/apm/cli_parity_test.go index 23b04000..3ca0e64c 100644 --- a/cmd/apm/cli_parity_test.go +++ b/cmd/apm/cli_parity_test.go @@ -102,8 +102,8 @@ func runPython(args ...string) (stdout, stderr string, exitCode int) { } // noPython returns true when the Python CLI is not available. -// Tests that require Python use this to return a vacuous pass rather than skip, -// so they do not reduce the correctness gate score. +// These optional progress tests skip Python comparison when it is unavailable; +// final completion is enforced by the explicit scorer gates instead. func noPython() bool { return pythonBin() == "" } @@ -248,10 +248,10 @@ func TestParityCLISelfUpdateAlias(t *testing.T) { // --- Python-vs-Go parity tests (require APM_PYTHON_BIN) --- // TestPythonVsGoVersionExitCode compares exit codes for --version. -// When APM_PYTHON_BIN is not set the test passes vacuously (no Python to compare). +// When APM_PYTHON_BIN is not set, this optional comparison is not completion evidence. func TestPythonVsGoVersionExitCode(t *testing.T) { if noPython() { - t.Log("APM_PYTHON_BIN not set; skipping Python-vs-Go comparison (vacuous pass)") + t.Log("APM_PYTHON_BIN not set; skipping optional Python-vs-Go comparison") return } _, _, pyCode := runPython("--version") @@ -264,7 +264,7 @@ func TestPythonVsGoVersionExitCode(t *testing.T) { // TestParityPythonVsGoHelpExitCode compares --help exit codes. func TestPythonVsGoHelpExitCode(t *testing.T) { if noPython() { - t.Log("APM_PYTHON_BIN not set; skipping Python-vs-Go comparison (vacuous pass)") + t.Log("APM_PYTHON_BIN not set; skipping optional Python-vs-Go comparison") return } _, _, pyCode := runPython("--help") @@ -277,7 +277,7 @@ func TestPythonVsGoHelpExitCode(t *testing.T) { // TestParityPythonVsGoUnknownCommandExitCode verifies both fail on unknown cmd. func TestPythonVsGoUnknownCommandExitCode(t *testing.T) { if noPython() { - t.Log("APM_PYTHON_BIN not set; skipping Python-vs-Go comparison (vacuous pass)") + t.Log("APM_PYTHON_BIN not set; skipping optional Python-vs-Go comparison") return } _, _, pyCode := runPython("totally-unknown-xyz") @@ -290,7 +290,7 @@ func TestPythonVsGoUnknownCommandExitCode(t *testing.T) { // TestParityPythonVsGoHelpCommandList verifies Go help lists all Python commands. func TestPythonVsGoHelpCommandList(t *testing.T) { if noPython() { - t.Log("APM_PYTHON_BIN not set; skipping Python-vs-Go comparison (vacuous pass)") + t.Log("APM_PYTHON_BIN not set; skipping optional Python-vs-Go comparison") return } pyOut, _, _ := runPython("--help") @@ -324,7 +324,7 @@ func TestPythonVsGoHelpCommandList(t *testing.T) { // TestParityPythonVsGoSubcommandHelpExitCodes compares --help exit codes. func TestPythonVsGoSubcommandHelpExitCodes(t *testing.T) { if noPython() { - t.Log("APM_PYTHON_BIN not set; skipping Python-vs-Go comparison (vacuous pass)") + t.Log("APM_PYTHON_BIN not set; skipping optional Python-vs-Go comparison") return } cmds := []string{ @@ -359,17 +359,20 @@ func goldenDir(t *testing.T) string { } // readGolden reads a golden file and returns its contents. -// Returns "" if the file does not exist (test passes vacuously). +// Golden fixtures are cutover evidence; missing fixtures must fail instead of +// passing without evidence. func readGolden(t *testing.T, name string) string { t.Helper() p := filepath.Join(goldenDir(t), name) b, err := os.ReadFile(p) if err != nil { - // Golden file absent: vacuous pass (framework not yet set up). - t.Logf("golden file %s not found; skipping comparison", name) - return "" + t.Fatalf("golden fixture %s is required but was not found: %v", name, err) } - return string(b) + content := string(b) + if strings.TrimSpace(content) == "" { + t.Fatalf("golden fixture %s is empty", name) + } + return content } // normalizeHelpOutput removes lines that vary between runs or versions: diff --git a/tests/unit/test_crane_score.py b/tests/unit/test_crane_score.py index 55f20713..f3cef940 100644 --- a/tests/unit/test_crane_score.py +++ b/tests/unit/test_crane_score.py @@ -74,6 +74,9 @@ def _deletion_gates() -> list[str]: '{"crane":"gate","name":"functional","passing":1,"total":1}', '{"crane":"gate","name":"state_diff","passing":1,"total":1}', '{"crane":"gate","name":"python_behavior_contracts","passing":1,"total":1}', + '{"crane":"gate","name":"golden_fixture_corpus","passed":true}', + '{"crane":"gate","name":"all_go_golden_tests","passed":true}', + '{"crane":"gate","name":"no_python_runtime_dependency","passed":true}', '{"crane":"gate","name":"known_exceptions","count":0}', '{"crane":"gate","name":"python_tests","passed":true}', '{"crane":"gate","name":"benchmarks","passed":true}', @@ -166,6 +169,9 @@ def test_crane_score_can_reach_one_with_all_deletion_grade_gates() -> None: "state_diff_contracts": 1.0, "python_behavior_contracts": 1.0, "known_exceptions": 0, + "golden_fixture_corpus": "pass", + "all_go_golden_tests": "pass", + "no_python_runtime_dependency": "pass", "go_tests": "pass", "python_tests": "pass", "benchmarks": "pass", @@ -181,6 +187,9 @@ def test_crane_score_can_reach_one_with_all_deletion_grade_gates() -> None: '{"crane":"gate","name":"functional","passing":0,"total":1}', '{"crane":"gate","name":"state_diff","passing":0,"total":1}', '{"crane":"gate","name":"python_behavior_contracts","passing":0,"total":1}', + '{"crane":"gate","name":"golden_fixture_corpus","passed":false}', + '{"crane":"gate","name":"all_go_golden_tests","passed":false}', + '{"crane":"gate","name":"no_python_runtime_dependency","passed":false}', '{"crane":"gate","name":"known_exceptions","count":1}', '{"crane":"gate","name":"python_tests","passed":false}', '{"crane":"gate","name":"benchmarks","passed":false}', @@ -205,6 +214,31 @@ def test_crane_score_full_parity_but_missing_deletion_gates_cannot_reach_one() - assert score["deletion_grade_ready"] is False +def test_crane_score_full_parity_without_golden_cutover_gates_cannot_reach_one() -> None: + omitted_gates = { + "golden_fixture_corpus", + "all_go_golden_tests", + "no_python_runtime_dependency", + } + gates = [ + line + for line in _deletion_gates() + if json.loads(line)["name"] not in omitted_gates + ] + + score = _run_score([*_parity_passes(302), _package_pass(), *gates]) + gates_by_name = _gates(score) + + assert score["migration_score"] < 1.0 + assert score["deletion_grade_ready"] is False + assert score["golden_fixture_corpus"] is False + assert score["all_go_golden_tests"] is False + assert score["no_python_runtime_dependency"] is False + assert gates_by_name["golden_fixture_corpus"]["passing"] is False + assert gates_by_name["all_go_golden_tests"]["passing"] is False + assert gates_by_name["no_python_runtime_dependency"]["passing"] is False + + def test_crane_score_package_level_go_failure_blocks_one() -> None: score = _run_score([*_parity_passes(302), _package_fail(), *_deletion_gates()]) @@ -242,6 +276,9 @@ def test_crane_score_reaches_one_with_completion_tests_and_explicit_behavior_gat *_parity_passes(293), *_completion_gate_events(), _behavior_contract_gate_output(1, 1), + '{"crane":"gate","name":"golden_fixture_corpus","passed":true}', + '{"crane":"gate","name":"all_go_golden_tests","passed":true}', + '{"crane":"gate","name":"no_python_runtime_dependency","passed":true}', _package_pass(), ] ) From 0fbef5da10d4bc3f243166c61686e1bbcefcaa32 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 3 Jun 2026 22:19:59 +0000 Subject: [PATCH 2/3] Fix test file formatting for CI Co-authored-by: mrjf <180956+mrjf@users.noreply.github.com> --- tests/unit/test_crane_score.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/unit/test_crane_score.py b/tests/unit/test_crane_score.py index f3cef940..e151a4f3 100644 --- a/tests/unit/test_crane_score.py +++ b/tests/unit/test_crane_score.py @@ -220,11 +220,7 @@ def test_crane_score_full_parity_without_golden_cutover_gates_cannot_reach_one() "all_go_golden_tests", "no_python_runtime_dependency", } - gates = [ - line - for line in _deletion_gates() - if json.loads(line)["name"] not in omitted_gates - ] + gates = [line for line in _deletion_gates() if json.loads(line)["name"] not in omitted_gates] score = _run_score([*_parity_passes(302), _package_pass(), *gates]) gates_by_name = _gates(score) From 190b4f23ede86de30041e4981227237b0bd8d633 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 3 Jun 2026 22:34:38 +0000 Subject: [PATCH 3/3] Fix parity coverage manifest for new Python tests Co-authored-by: mrjf <180956+mrjf@users.noreply.github.com> --- tests/parity/python_contract_coverage.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/parity/python_contract_coverage.yml b/tests/parity/python_contract_coverage.yml index b6568675..60fff492 100644 --- a/tests/parity/python_contract_coverage.yml +++ b/tests/parity/python_contract_coverage.yml @@ -20203,14 +20203,18 @@ python_tests: - tests/unit/test_copilot_runtime.py::TestCopilotRuntime::test_str_representation - tests/unit/test_copilot_runtime.py::TestMcpConfigUtf8RoundTrip::test_get_mcp_servers_reads_non_ascii - tests/unit/test_crane_scheduler.py::test_completed_state_skips_inactive_migration + - tests/unit/test_crane_scheduler.py::test_issue_label_detection_accepts_github_label_payloads - tests/unit/test_crane_scheduler.py::test_active_issue_overrides_stale_completed_state - tests/unit/test_crane_scheduler.py::test_active_issue_does_not_override_pause - tests/unit/test_crane_scheduler.py::test_machine_state_completed_string_is_recognized + - tests/unit/test_crane_scheduler.py::test_pr_head_gate_fails_when_any_check_is_not_success + - tests/unit/test_crane_scheduler.py::test_pr_head_gate_passes_only_when_all_checks_succeed - tests/unit/test_crane_score.py::test_crane_score_counts_parity_events - tests/unit/test_crane_score.py::test_crane_score_applies_target_correctness_gate - tests/unit/test_crane_score.py::test_crane_score_can_reach_one_with_all_deletion_grade_gates - tests/unit/test_crane_score.py::test_crane_score_full_parity_but_bad_deletion_gate_cannot_reach_one - tests/unit/test_crane_score.py::test_crane_score_full_parity_but_missing_deletion_gates_cannot_reach_one + - tests/unit/test_crane_score.py::test_crane_score_full_parity_without_golden_cutover_gates_cannot_reach_one - tests/unit/test_crane_score.py::test_crane_score_package_level_go_failure_blocks_one - tests/unit/test_crane_score.py::test_crane_score_rejects_empty_event_stream - tests/unit/test_crane_score.py::test_crane_score_reaches_one_with_completion_tests_and_explicit_behavior_gate @@ -20218,8 +20222,10 @@ python_tests: - tests/unit/test_crane_score.py::test_crane_score_blocks_incomplete_behavior_contract_gate - tests/unit/test_crane_score.py::test_crane_score_blocks_known_exceptions - tests/unit/test_crane_workflow_prompt.py::test_crane_acceptance_requires_shared_iteration_summary_for_pr_updates + - tests/unit/test_crane_workflow_prompt.py::test_crane_completion_is_two_phase_and_pr_head_gated - tests/unit/test_crane_workflow_prompt.py::test_crane_commit_guidance_provides_structured_summary_fallback - tests/unit/test_crane_workflow_prompt.py::test_crane_prompt_blocks_stale_completed_state_from_finishing + - tests/unit/test_crane_workflow_prompt.py::test_crane_state_template_tracks_completion_candidate_gate - tests/unit/test_cursor_mcp.py::TestCursorClientFactory::test_create_cursor_client - tests/unit/test_cursor_mcp.py::TestCursorClientFactory::test_create_cursor_client_case_insensitive - tests/unit/test_cursor_mcp.py::TestCursorClientAdapter::test_config_path_is_repo_local