diff --git a/.crane/scripts/score.go b/.crane/scripts/score.go index 130ade26..da3d1a4e 100644 --- a/.crane/scripts/score.go +++ b/.crane/scripts/score.go @@ -149,29 +149,9 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) { if !strings.HasPrefix(line, "{") { continue } - var gate GateEvent - if err := json.Unmarshal([]byte(line), &gate); err == nil && gate.Crane == "gate" { + if gate, ok := parseGateEvent(line); ok { eventsSeen++ - switch gate.Name { - case "python_reference": - pythonReference = BoolGate{Seen: true, Passed: gate.Passed} - case "surface": - surface = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total} - case "help": - help = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total} - case "functional": - functional = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total} - case "state_diff": - stateDiff = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total} - case "python_behavior_contracts": - behaviorContracts = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total} - case "known_exceptions": - knownExceptions = gate.Count - case "python_tests": - pythonTests = BoolGate{Seen: true, Passed: gate.Passed} - case "benchmarks": - benchmarks = BoolGate{Seen: true, Passed: gate.Passed} - } + applyGateEvent(gate, &pythonReference, &surface, &help, &functional, &stateDiff, &behaviorContracts, &knownExceptions, &pythonTests, &benchmarks) continue } @@ -182,6 +162,9 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) { eventsSeen++ if ev.Output != "" { + if gate, ok := parseGateEvent(ev.Output); ok { + applyGateEvent(gate, &pythonReference, &surface, &help, &functional, &stateDiff, &behaviorContracts, &knownExceptions, &pythonTests, &benchmarks) + } if n, ok := approvedExceptionCount(ev.Output); ok && n > knownExceptions { knownExceptions = n } @@ -253,7 +236,7 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) { stateDiff = inferredAnyRatioGate(passed, failed, "TestParityCompletionStateDiffContracts", "TestParityStateDiffContracts") } if !behaviorContracts.Seen { - behaviorContracts = inferredAnyRatioGate(passed, failed, "TestParityCompletionPythonBehaviorContracts") + behaviorContracts = RatioGate{Seen: true, Passing: 0, Total: 1} } if !pythonTests.Seen { pythonTests = BoolGate{Seen: true, Passed: testPassed(passed, failed, "TestParityCompletionPythonSuite")} @@ -341,6 +324,52 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) { }, nil } +func parseGateEvent(line string) (GateEvent, bool) { + line = strings.TrimSpace(line) + if !strings.HasPrefix(line, "{") { + return GateEvent{}, false + } + var gate GateEvent + if err := json.Unmarshal([]byte(line), &gate); err != nil || gate.Crane != "gate" { + return GateEvent{}, false + } + return gate, true +} + +func applyGateEvent( + gate GateEvent, + pythonReference *BoolGate, + surface *RatioGate, + help *RatioGate, + functional *RatioGate, + stateDiff *RatioGate, + behaviorContracts *RatioGate, + knownExceptions *int, + pythonTests *BoolGate, + benchmarks *BoolGate, +) { + switch gate.Name { + case "python_reference": + *pythonReference = BoolGate{Seen: true, Passed: gate.Passed} + case "surface": + *surface = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total} + case "help": + *help = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total} + case "functional": + *functional = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total} + case "state_diff": + *stateDiff = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total} + case "python_behavior_contracts": + *behaviorContracts = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total} + case "known_exceptions": + *knownExceptions = gate.Count + case "python_tests": + *pythonTests = BoolGate{Seen: true, Passed: gate.Passed} + case "benchmarks": + *benchmarks = BoolGate{Seen: true, Passed: gate.Passed} + } +} + func isTargetPackage(pkg string) bool { return strings.HasPrefix(pkg, "github.com/githubnext/apm/") } diff --git a/.github/workflows/migration-ci.yml b/.github/workflows/migration-ci.yml index ba4b65b0..3a3d197e 100644 --- a/.github/workflows/migration-ci.yml +++ b/.github/workflows/migration-ci.yml @@ -111,6 +111,7 @@ jobs: uv run python scripts/ci/python_behavior_contracts.py check \ --inventory "$RUNNER_TEMP/python-behavior-contracts.json" \ --coverage tests/parity/python_contract_coverage.yml \ + --allow-intentionally-incomplete \ --summary "$RUNNER_TEMP/python-contract-coverage.md" || true python - "$RUNNER_TEMP/migration-score.json" <<'PY' import json diff --git a/cmd/apm/python_behavior_contracts_test.go b/cmd/apm/python_behavior_contracts_test.go index b1a436c4..33559297 100644 --- a/cmd/apm/python_behavior_contracts_test.go +++ b/cmd/apm/python_behavior_contracts_test.go @@ -2,6 +2,7 @@ package main import ( "encoding/json" + "fmt" "os" "os/exec" "path/filepath" @@ -108,6 +109,10 @@ func normalizeContractHelp(text string) string { return strings.TrimRight(strings.Join(lines, "\n"), "\n") } +func emitCraneRatioGate(name string, passing, total int) { + fmt.Printf("{\"crane\":\"gate\",\"name\":%q,\"passing\":%d,\"total\":%d}\n", name, passing, total) +} + func TestParityPythonCommandSurfaceFromSource(t *testing.T) { inv := loadPythonBehaviorInventory(t, false) if len(inv.Commands) == 0 { @@ -190,6 +195,12 @@ func TestParityCompletionPythonBehaviorContracts(t *testing.T) { check.Env = append(os.Environ(), "NO_COLOR=1", "COLUMNS=10000") out, err := check.CombinedOutput() if err != nil { - t.Fatalf("Python behavior contracts are not fully covered:\n%s", string(out)) + emitCraneRatioGate("python_behavior_contracts", 0, 1) + if os.Getenv("APM_ENFORCE_PYTHON_BEHAVIOR_CONTRACTS") == "1" { + t.Fatalf("Python behavior contracts are not fully covered:\n%s", string(out)) + } + t.Logf("Python behavior contracts are not fully covered; migration remains incomplete:\n%s", string(out)) + return } + emitCraneRatioGate("python_behavior_contracts", 1, 1) } diff --git a/scripts/ci/python_behavior_contracts.py b/scripts/ci/python_behavior_contracts.py index 93693f23..56cd982e 100644 --- a/scripts/ci/python_behavior_contracts.py +++ b/scripts/ci/python_behavior_contracts.py @@ -404,7 +404,16 @@ def cmd_check(args: argparse.Namespace) -> int: Path(args.summary).write_text(summary, encoding="utf-8") print(summary) if coverage.get("status") == "intentionally-incomplete": - # Manifest explicitly declared incomplete; report findings without failing. + if not args.allow_intentionally_incomplete: + print( + "coverage manifest declares status: intentionally-incomplete; " + "remove that status only after all findings are resolved", + file=sys.stderr, + ) + return 1 + # Report-only mode for progress summaries. Completion checks must not + # use this flag, because an intentionally incomplete manifest is not + # deletion-grade evidence. return 0 return 1 if findings else 0 @@ -425,6 +434,11 @@ def main(argv: list[str] | None = None) -> int: help="coverage manifest path", ) check.add_argument("--summary", help="write markdown coverage summary to this path") + check.add_argument( + "--allow-intentionally-incomplete", + action="store_true", + help="report findings without failing when the manifest is marked incomplete", + ) check.set_defaults(func=cmd_check) args = parser.parse_args(argv) diff --git a/tests/parity/README.md b/tests/parity/README.md index bcd95d61..5ac78fb9 100644 --- a/tests/parity/README.md +++ b/tests/parity/README.md @@ -15,3 +15,8 @@ tests. contracts to parity evidence. The completion scorer must not reach `migration_score = 1.0` while any extracted command or Python test lacks mapped coverage. + +`status: intentionally-incomplete` is a progress marker only. It must make +completion scoring fail; use `--allow-intentionally-incomplete` only for +report-only summaries. Set `APM_ENFORCE_PYTHON_BEHAVIOR_CONTRACTS=1` when a +local or CI check should hard-fail instead of reporting incomplete progress. diff --git a/tests/parity/test_python_behavior_contracts.py b/tests/parity/test_python_behavior_contracts.py index 5eb9d559..7926bf1a 100644 --- a/tests/parity/test_python_behavior_contracts.py +++ b/tests/parity/test_python_behavior_contracts.py @@ -142,6 +142,14 @@ def test_every_python_command_rejects_unknown_option_consistently( def test_python_contract_coverage_manifest_is_complete(inventory: dict[str, object]) -> None: coverage = _load_coverage(ROOT / "tests" / "parity" / "python_contract_coverage.yml") if coverage.get("status") == "intentionally-incomplete": - pytest.skip("Coverage manifest is intentionally incomplete; remove status field to enforce") + if os.environ.get("APM_ENFORCE_PYTHON_BEHAVIOR_CONTRACTS") != "1": + pytest.xfail( + "Coverage manifest is intentionally incomplete; completion gate " + "is reported by migration_score" + ) + pytest.fail( + "Coverage manifest is intentionally incomplete; remove status field " + "only after all contracts are mapped" + ) findings = check_coverage(inventory, coverage) assert not findings, render_summary(inventory, findings) diff --git a/tests/unit/test_crane_score.py b/tests/unit/test_crane_score.py index 7822b1c8..55f20713 100644 --- a/tests/unit/test_crane_score.py +++ b/tests/unit/test_crane_score.py @@ -95,6 +95,22 @@ def _completion_gate_events() -> list[str]: return [line for test in tests for line in _go_pass(test)] +def _behavior_contract_gate_output(passing: int, total: int) -> str: + return _event( + "output", + "TestParityCompletionPythonBehaviorContracts", + output=json.dumps( + { + "crane": "gate", + "name": "python_behavior_contracts", + "passing": passing, + "total": total, + } + ) + + "\n", + ) + + def _gates(score: dict[str, object]) -> dict[str, dict[str, object]]: gates = score["gates"] assert isinstance(gates, list) @@ -220,8 +236,15 @@ def test_crane_score_rejects_empty_event_stream() -> None: assert "empty or incomplete" in result.stderr -def test_crane_score_infers_cutover_gates_from_completion_tests() -> None: - score = _run_score([*_parity_passes(293), *_completion_gate_events(), _package_pass()]) +def test_crane_score_reaches_one_with_completion_tests_and_explicit_behavior_gate() -> None: + score = _run_score( + [ + *_parity_passes(293), + *_completion_gate_events(), + _behavior_contract_gate_output(1, 1), + _package_pass(), + ] + ) assert score["migration_score"] == 1.0 assert score["progress"] == 1.0 @@ -229,6 +252,33 @@ def test_crane_score_infers_cutover_gates_from_completion_tests() -> None: assert all(gate["passing"] for gate in _gates(score).values()) +def test_crane_score_does_not_infer_behavior_contracts_from_test_name() -> None: + score = _run_score([*_parity_passes(293), *_completion_gate_events(), _package_pass()]) + gates = _gates(score) + + assert score["progress"] == 1.0 + assert score["migration_score"] < 1.0 + assert score["deletion_grade_ready"] is False + assert gates["python_behavior_contracts"]["passing"] is False + + +def test_crane_score_blocks_incomplete_behavior_contract_gate() -> None: + score = _run_score( + [ + *_parity_passes(293), + *_completion_gate_events(), + _behavior_contract_gate_output(0, 1), + _package_pass(), + ] + ) + gates = _gates(score) + + assert score["progress"] == 1.0 + assert score["migration_score"] < 1.0 + assert score["deletion_grade_ready"] is False + assert gates["python_behavior_contracts"]["passing"] is False + + def test_crane_score_blocks_known_exceptions() -> None: score = _run_score( [