Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 60 additions & 2 deletions .crane/scripts/score.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ type CutoverGates struct {
FunctionalContracts float64 `json:"functional_contracts"`
StateDiffContracts float64 `json:"state_diff_contracts"`
PythonBehaviorContracts float64 `json:"python_behavior_contracts"`
GoldenFixtureCorpus string `json:"golden_fixture_corpus"`
AllGoGoldenTests string `json:"all_go_golden_tests"`
NoPythonRuntime string `json:"no_python_runtime_dependency"`
KnownExceptions int `json:"known_exceptions"`
GoTests string `json:"go_tests"`
PythonTests string `json:"python_tests"`
Expand Down Expand Up @@ -99,6 +102,9 @@ type Score struct {
PythonTestsPassing bool `json:"python_tests_passing"`
GoTestsPassing bool `json:"go_tests_passing"`
BenchmarksPassing bool `json:"benchmarks_passing"`
GoldenFixtureCorpus bool `json:"golden_fixture_corpus"`
AllGoGoldenTests bool `json:"all_go_golden_tests"`
NoPythonRuntime bool `json:"no_python_runtime_dependency"`
ParityPassing int `json:"parity_passing"`
ParityTotal int `json:"parity_total"`
SourceTestsPassing int `json:"source_tests_passing"`
Expand Down Expand Up @@ -143,6 +149,9 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
functional := RatioGate{}
stateDiff := RatioGate{}
behaviorContracts := RatioGate{}
goldenFixtureCorpus := BoolGate{}
allGoGoldenTests := BoolGate{}
noPythonRuntime := BoolGate{}

for scanner.Scan() {
line := scanner.Text()
Expand All @@ -151,7 +160,21 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
}
if gate, ok := parseGateEvent(line); ok {
eventsSeen++
applyGateEvent(gate, &pythonReference, &surface, &help, &functional, &stateDiff, &behaviorContracts, &knownExceptions, &pythonTests, &benchmarks)
applyGateEvent(
gate,
&pythonReference,
&surface,
&help,
&functional,
&stateDiff,
&behaviorContracts,
&goldenFixtureCorpus,
&allGoGoldenTests,
&noPythonRuntime,
&knownExceptions,
&pythonTests,
&benchmarks,
)
continue
}

Expand All @@ -163,7 +186,21 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {

if ev.Output != "" {
if gate, ok := parseGateEvent(ev.Output); ok {
applyGateEvent(gate, &pythonReference, &surface, &help, &functional, &stateDiff, &behaviorContracts, &knownExceptions, &pythonTests, &benchmarks)
applyGateEvent(
gate,
&pythonReference,
&surface,
&help,
&functional,
&stateDiff,
&behaviorContracts,
&goldenFixtureCorpus,
&allGoGoldenTests,
&noPythonRuntime,
&knownExceptions,
&pythonTests,
&benchmarks,
)
}
if n, ok := approvedExceptionCount(ev.Output); ok && n > knownExceptions {
knownExceptions = n
Expand Down Expand Up @@ -253,6 +290,9 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
FunctionalContracts: functional.Percent(),
StateDiffContracts: stateDiff.Percent(),
PythonBehaviorContracts: behaviorContracts.Percent(),
GoldenFixtureCorpus: passFail(goldenFixtureCorpus.OK()),
AllGoGoldenTests: passFail(allGoGoldenTests.OK()),
NoPythonRuntime: passFail(noPythonRuntime.OK()),
KnownExceptions: knownExceptions,
GoTests: passFail(goTestsPass),
PythonTests: passFail(pythonTests.OK()),
Expand All @@ -275,6 +315,9 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
gates.FunctionalContracts == 1.0 &&
gates.StateDiffContracts == 1.0 &&
gates.PythonBehaviorContracts == 1.0 &&
gates.GoldenFixtureCorpus == "pass" &&
gates.AllGoGoldenTests == "pass" &&
gates.NoPythonRuntime == "pass" &&
gates.KnownExceptions == 0 &&
gates.GoTests == "pass" &&
gates.PythonTests == "pass" &&
Expand Down Expand Up @@ -315,6 +358,9 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
PythonTestsPassing: gates.PythonTests == "pass",
GoTestsPassing: gates.GoTests == "pass",
BenchmarksPassing: gates.Benchmarks == "pass",
GoldenFixtureCorpus: gates.GoldenFixtureCorpus == "pass",
AllGoGoldenTests: gates.AllGoGoldenTests == "pass",
NoPythonRuntime: gates.NoPythonRuntime == "pass",
ParityPassing: metrics.ParityPassing,
ParityTotal: metrics.ParityTotal,
SourceTestsPassing: metrics.SourceTestsPassing,
Expand Down Expand Up @@ -344,6 +390,9 @@ func applyGateEvent(
functional *RatioGate,
stateDiff *RatioGate,
behaviorContracts *RatioGate,
goldenFixtureCorpus *BoolGate,
allGoGoldenTests *BoolGate,
noPythonRuntime *BoolGate,
knownExceptions *int,
pythonTests *BoolGate,
benchmarks *BoolGate,
Expand All @@ -361,6 +410,12 @@ func applyGateEvent(
*stateDiff = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
case "python_behavior_contracts":
*behaviorContracts = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
case "golden_fixture_corpus":
*goldenFixtureCorpus = BoolGate{Seen: true, Passed: gate.Passed}
case "all_go_golden_tests":
*allGoGoldenTests = BoolGate{Seen: true, Passed: gate.Passed}
case "no_python_runtime_dependency":
*noPythonRuntime = BoolGate{Seen: true, Passed: gate.Passed}
case "known_exceptions":
*knownExceptions = gate.Count
case "python_tests":
Expand Down Expand Up @@ -435,6 +490,9 @@ func gateResults(gates CutoverGates) []GateResult {
{Name: "functional_contracts", Passing: gates.FunctionalContracts == 1.0},
{Name: "state_diff_contracts", Passing: gates.StateDiffContracts == 1.0},
{Name: "python_behavior_contracts", Passing: gates.PythonBehaviorContracts == 1.0},
{Name: "golden_fixture_corpus", Passing: gates.GoldenFixtureCorpus == "pass"},
{Name: "all_go_golden_tests", Passing: gates.AllGoGoldenTests == "pass"},
{Name: "no_python_runtime_dependency", Passing: gates.NoPythonRuntime == "pass"},
{Name: "python_tests_pass", Passing: gates.PythonTests == "pass"},
{Name: "benchmarks_pass", Passing: gates.Benchmarks == "pass"},
{Name: "no_known_exceptions", Passing: gates.KnownExceptions == 0},
Expand Down
34 changes: 25 additions & 9 deletions cmd/apm/CUTOVER.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@ via PyInstaller packaging and `pip install apm-cli`.
The Go CLI currently implements:
- `apm --help` / `apm --version` (full parity with Python)
- `apm init [--yes] [PROJECT_NAME]` (functional, creates apm.yml)
- Per-command `--help` for all 26 commands (golden-file verified)
- Per-command `--help` for all 26 commands (initial golden-file coverage)

The checked-in `cmd/apm/testdata/golden/` files are the start of the
cutover corpus, not final completion proof. Final completion requires the
full command matrix below to be represented as committed fixtures and replayed
by Go without invoking the Python runtime.

Remaining commands return a "not yet fully implemented" message.

Expand All @@ -28,8 +33,18 @@ are true:
`policy`, `mcp`, `runtime`, `targets`, `list`, `view`, `cache`,
`deps`, `marketplace`, `uninstall`, `prune`
3. Python-vs-Go parity tests pass for all commands in the matrix
4. `go build ./cmd/apm` produces a single static binary
5. CI passes on the crane PR branch (`crane/crane-migration-python-to-go-full-apm-cli-rewrite`)
4. The final Python-reference parity run has been frozen into a committed,
versioned golden fixture corpus. The corpus must include CLI inventory,
help and usage output, error output, exit codes, generated files, lockfiles,
config files, managed-file manifests, deterministic cache/config layout, and
audit artifacts for the full command matrix.
5. An all-Go golden replay passes against that corpus with no live Python
oracle. The replay must build `cmd/apm` and compare only the Go binary
against checked-in fixtures.
6. A no-Python-runtime check passes: `APM_PYTHON_BIN` is unset, the Python CLI
is hidden or unavailable to the replay, and the golden replay still passes.
7. `go build ./cmd/apm` produces a single static binary
8. CI passes on the crane PR branch (`crane/crane-migration-python-to-go-full-apm-cli-rewrite`)

## Cutover Steps

Expand All @@ -46,13 +61,14 @@ When conditions are met:

## Python Compatibility Shim

Until all commands are implemented in Go, the Python CLI remains the
authoritative `apm` command. The Go binary is available as `apm-go`
for testing.
Until all commands are implemented in Go and the golden replay gate passes, the
Python CLI remains the authoritative `apm` command. The Go binary is available
as `apm-go` for testing.

The shim removal plan: once the command matrix passes functional tests,
the Python entrypoint is replaced by the Go binary in the same PR that
passes the final parity tests.
The shim removal plan: once the command matrix passes functional tests, the
final Python-reference behavior is frozen into golden fixtures. Only after the
all-Go replay passes without a Python runtime can the Python entrypoint be
replaced by the Go binary.

## Timeline

Expand Down
29 changes: 16 additions & 13 deletions cmd/apm/cli_parity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ func runPython(args ...string) (stdout, stderr string, exitCode int) {
}

// noPython returns true when the Python CLI is not available.
// Tests that require Python use this to return a vacuous pass rather than skip,
// so they do not reduce the correctness gate score.
// These optional progress tests skip Python comparison when it is unavailable;
// final completion is enforced by the explicit scorer gates instead.
func noPython() bool {
return pythonBin() == ""
}
Expand Down Expand Up @@ -248,10 +248,10 @@ func TestParityCLISelfUpdateAlias(t *testing.T) {
// --- Python-vs-Go parity tests (require APM_PYTHON_BIN) ---

// TestPythonVsGoVersionExitCode compares exit codes for --version.
// When APM_PYTHON_BIN is not set the test passes vacuously (no Python to compare).
// When APM_PYTHON_BIN is not set, this optional comparison is not completion evidence.
func TestPythonVsGoVersionExitCode(t *testing.T) {
if noPython() {
t.Log("APM_PYTHON_BIN not set; skipping Python-vs-Go comparison (vacuous pass)")
t.Log("APM_PYTHON_BIN not set; skipping optional Python-vs-Go comparison")
return
}
_, _, pyCode := runPython("--version")
Expand All @@ -264,7 +264,7 @@ func TestPythonVsGoVersionExitCode(t *testing.T) {
// TestParityPythonVsGoHelpExitCode compares --help exit codes.
func TestPythonVsGoHelpExitCode(t *testing.T) {
if noPython() {
t.Log("APM_PYTHON_BIN not set; skipping Python-vs-Go comparison (vacuous pass)")
t.Log("APM_PYTHON_BIN not set; skipping optional Python-vs-Go comparison")
return
}
_, _, pyCode := runPython("--help")
Expand All @@ -277,7 +277,7 @@ func TestPythonVsGoHelpExitCode(t *testing.T) {
// TestParityPythonVsGoUnknownCommandExitCode verifies both fail on unknown cmd.
func TestPythonVsGoUnknownCommandExitCode(t *testing.T) {
if noPython() {
t.Log("APM_PYTHON_BIN not set; skipping Python-vs-Go comparison (vacuous pass)")
t.Log("APM_PYTHON_BIN not set; skipping optional Python-vs-Go comparison")
return
}
_, _, pyCode := runPython("totally-unknown-xyz")
Expand All @@ -290,7 +290,7 @@ func TestPythonVsGoUnknownCommandExitCode(t *testing.T) {
// TestParityPythonVsGoHelpCommandList verifies Go help lists all Python commands.
func TestPythonVsGoHelpCommandList(t *testing.T) {
if noPython() {
t.Log("APM_PYTHON_BIN not set; skipping Python-vs-Go comparison (vacuous pass)")
t.Log("APM_PYTHON_BIN not set; skipping optional Python-vs-Go comparison")
return
}
pyOut, _, _ := runPython("--help")
Expand Down Expand Up @@ -324,7 +324,7 @@ func TestPythonVsGoHelpCommandList(t *testing.T) {
// TestParityPythonVsGoSubcommandHelpExitCodes compares <cmd> --help exit codes.
func TestPythonVsGoSubcommandHelpExitCodes(t *testing.T) {
if noPython() {
t.Log("APM_PYTHON_BIN not set; skipping Python-vs-Go comparison (vacuous pass)")
t.Log("APM_PYTHON_BIN not set; skipping optional Python-vs-Go comparison")
return
}
cmds := []string{
Expand Down Expand Up @@ -359,17 +359,20 @@ func goldenDir(t *testing.T) string {
}

// readGolden reads a golden file and returns its contents.
// Returns "" if the file does not exist (test passes vacuously).
// Golden fixtures are cutover evidence; missing fixtures must fail instead of
// passing without evidence.
func readGolden(t *testing.T, name string) string {
t.Helper()
p := filepath.Join(goldenDir(t), name)
b, err := os.ReadFile(p)
if err != nil {
// Golden file absent: vacuous pass (framework not yet set up).
t.Logf("golden file %s not found; skipping comparison", name)
return ""
t.Fatalf("golden fixture %s is required but was not found: %v", name, err)
}
return string(b)
content := string(b)
if strings.TrimSpace(content) == "" {
t.Fatalf("golden fixture %s is empty", name)
}
return content
}

// normalizeHelpOutput removes lines that vary between runs or versions:
Expand Down
6 changes: 6 additions & 0 deletions tests/parity/python_contract_coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20203,23 +20203,29 @@ python_tests:
- tests/unit/test_copilot_runtime.py::TestCopilotRuntime::test_str_representation
- tests/unit/test_copilot_runtime.py::TestMcpConfigUtf8RoundTrip::test_get_mcp_servers_reads_non_ascii
- tests/unit/test_crane_scheduler.py::test_completed_state_skips_inactive_migration
- tests/unit/test_crane_scheduler.py::test_issue_label_detection_accepts_github_label_payloads
- tests/unit/test_crane_scheduler.py::test_active_issue_overrides_stale_completed_state
- tests/unit/test_crane_scheduler.py::test_active_issue_does_not_override_pause
- tests/unit/test_crane_scheduler.py::test_machine_state_completed_string_is_recognized
- tests/unit/test_crane_scheduler.py::test_pr_head_gate_fails_when_any_check_is_not_success
- tests/unit/test_crane_scheduler.py::test_pr_head_gate_passes_only_when_all_checks_succeed
- tests/unit/test_crane_score.py::test_crane_score_counts_parity_events
- tests/unit/test_crane_score.py::test_crane_score_applies_target_correctness_gate
- tests/unit/test_crane_score.py::test_crane_score_can_reach_one_with_all_deletion_grade_gates
- tests/unit/test_crane_score.py::test_crane_score_full_parity_but_bad_deletion_gate_cannot_reach_one
- tests/unit/test_crane_score.py::test_crane_score_full_parity_but_missing_deletion_gates_cannot_reach_one
- tests/unit/test_crane_score.py::test_crane_score_full_parity_without_golden_cutover_gates_cannot_reach_one
- tests/unit/test_crane_score.py::test_crane_score_package_level_go_failure_blocks_one
- tests/unit/test_crane_score.py::test_crane_score_rejects_empty_event_stream
- tests/unit/test_crane_score.py::test_crane_score_reaches_one_with_completion_tests_and_explicit_behavior_gate
- tests/unit/test_crane_score.py::test_crane_score_does_not_infer_behavior_contracts_from_test_name
- tests/unit/test_crane_score.py::test_crane_score_blocks_incomplete_behavior_contract_gate
- tests/unit/test_crane_score.py::test_crane_score_blocks_known_exceptions
- tests/unit/test_crane_workflow_prompt.py::test_crane_acceptance_requires_shared_iteration_summary_for_pr_updates
- tests/unit/test_crane_workflow_prompt.py::test_crane_completion_is_two_phase_and_pr_head_gated
- tests/unit/test_crane_workflow_prompt.py::test_crane_commit_guidance_provides_structured_summary_fallback
- tests/unit/test_crane_workflow_prompt.py::test_crane_prompt_blocks_stale_completed_state_from_finishing
- tests/unit/test_crane_workflow_prompt.py::test_crane_state_template_tracks_completion_candidate_gate
- tests/unit/test_cursor_mcp.py::TestCursorClientFactory::test_create_cursor_client
- tests/unit/test_cursor_mcp.py::TestCursorClientFactory::test_create_cursor_client_case_insensitive
- tests/unit/test_cursor_mcp.py::TestCursorClientAdapter::test_config_path_is_repo_local
Expand Down
33 changes: 33 additions & 0 deletions tests/unit/test_crane_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ def _deletion_gates() -> list[str]:
'{"crane":"gate","name":"functional","passing":1,"total":1}',
'{"crane":"gate","name":"state_diff","passing":1,"total":1}',
'{"crane":"gate","name":"python_behavior_contracts","passing":1,"total":1}',
'{"crane":"gate","name":"golden_fixture_corpus","passed":true}',
'{"crane":"gate","name":"all_go_golden_tests","passed":true}',
'{"crane":"gate","name":"no_python_runtime_dependency","passed":true}',
'{"crane":"gate","name":"known_exceptions","count":0}',
'{"crane":"gate","name":"python_tests","passed":true}',
'{"crane":"gate","name":"benchmarks","passed":true}',
Expand Down Expand Up @@ -166,6 +169,9 @@ def test_crane_score_can_reach_one_with_all_deletion_grade_gates() -> None:
"state_diff_contracts": 1.0,
"python_behavior_contracts": 1.0,
"known_exceptions": 0,
"golden_fixture_corpus": "pass",
"all_go_golden_tests": "pass",
"no_python_runtime_dependency": "pass",
"go_tests": "pass",
"python_tests": "pass",
"benchmarks": "pass",
Expand All @@ -181,6 +187,9 @@ def test_crane_score_can_reach_one_with_all_deletion_grade_gates() -> None:
'{"crane":"gate","name":"functional","passing":0,"total":1}',
'{"crane":"gate","name":"state_diff","passing":0,"total":1}',
'{"crane":"gate","name":"python_behavior_contracts","passing":0,"total":1}',
'{"crane":"gate","name":"golden_fixture_corpus","passed":false}',
'{"crane":"gate","name":"all_go_golden_tests","passed":false}',
'{"crane":"gate","name":"no_python_runtime_dependency","passed":false}',
'{"crane":"gate","name":"known_exceptions","count":1}',
'{"crane":"gate","name":"python_tests","passed":false}',
'{"crane":"gate","name":"benchmarks","passed":false}',
Expand All @@ -205,6 +214,27 @@ def test_crane_score_full_parity_but_missing_deletion_gates_cannot_reach_one() -
assert score["deletion_grade_ready"] is False


def test_crane_score_full_parity_without_golden_cutover_gates_cannot_reach_one() -> None:
omitted_gates = {
"golden_fixture_corpus",
"all_go_golden_tests",
"no_python_runtime_dependency",
}
gates = [line for line in _deletion_gates() if json.loads(line)["name"] not in omitted_gates]

score = _run_score([*_parity_passes(302), _package_pass(), *gates])
gates_by_name = _gates(score)

assert score["migration_score"] < 1.0
assert score["deletion_grade_ready"] is False
assert score["golden_fixture_corpus"] is False
assert score["all_go_golden_tests"] is False
assert score["no_python_runtime_dependency"] is False
assert gates_by_name["golden_fixture_corpus"]["passing"] is False
assert gates_by_name["all_go_golden_tests"]["passing"] is False
assert gates_by_name["no_python_runtime_dependency"]["passing"] is False


def test_crane_score_package_level_go_failure_blocks_one() -> None:
score = _run_score([*_parity_passes(302), _package_fail(), *_deletion_gates()])

Expand Down Expand Up @@ -242,6 +272,9 @@ def test_crane_score_reaches_one_with_completion_tests_and_explicit_behavior_gat
*_parity_passes(293),
*_completion_gate_events(),
_behavior_contract_gate_output(1, 1),
'{"crane":"gate","name":"golden_fixture_corpus","passed":true}',
'{"crane":"gate","name":"all_go_golden_tests","passed":true}',
'{"crane":"gate","name":"no_python_runtime_dependency","passed":true}',
_package_pass(),
]
)
Expand Down