githubnext · mrjf · Jun 2, 2026 · Jun 2, 2026
diff --git a/.crane/scripts/score.go b/.crane/scripts/score.go
@@ -149,29 +149,9 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 		if !strings.HasPrefix(line, "{") {
 			continue
 		}
-		var gate GateEvent
-		if err := json.Unmarshal([]byte(line), &gate); err == nil && gate.Crane == "gate" {
+		if gate, ok := parseGateEvent(line); ok {
 			eventsSeen++
-			switch gate.Name {
-			case "python_reference":
-				pythonReference = BoolGate{Seen: true, Passed: gate.Passed}
-			case "surface":
-				surface = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
-			case "help":
-				help = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
-			case "functional":
-				functional = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
-			case "state_diff":
-				stateDiff = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
-			case "python_behavior_contracts":
-				behaviorContracts = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
-			case "known_exceptions":
-				knownExceptions = gate.Count
-			case "python_tests":
-				pythonTests = BoolGate{Seen: true, Passed: gate.Passed}
-			case "benchmarks":
-				benchmarks = BoolGate{Seen: true, Passed: gate.Passed}
-			}
+			applyGateEvent(gate, &pythonReference, &surface, &help, &functional, &stateDiff, &behaviorContracts, &knownExceptions, &pythonTests, &benchmarks)
 			continue
 		}
 
@@ -182,6 +162,9 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 		eventsSeen++
 
 		if ev.Output != "" {
+			if gate, ok := parseGateEvent(ev.Output); ok {
+				applyGateEvent(gate, &pythonReference, &surface, &help, &functional, &stateDiff, &behaviorContracts, &knownExceptions, &pythonTests, &benchmarks)
+			}
 			if n, ok := approvedExceptionCount(ev.Output); ok && n > knownExceptions {
 				knownExceptions = n
 			}
@@ -253,7 +236,7 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 		stateDiff = inferredAnyRatioGate(passed, failed, "TestParityCompletionStateDiffContracts", "TestParityStateDiffContracts")
 	}
 	if !behaviorContracts.Seen {
-		behaviorContracts = inferredAnyRatioGate(passed, failed, "TestParityCompletionPythonBehaviorContracts")
+		behaviorContracts = RatioGate{Seen: true, Passing: 0, Total: 1}
 	}
 	if !pythonTests.Seen {
 		pythonTests = BoolGate{Seen: true, Passed: testPassed(passed, failed, "TestParityCompletionPythonSuite")}
@@ -341,6 +324,52 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 	}, nil
 }
 
+func parseGateEvent(line string) (GateEvent, bool) {
+	line = strings.TrimSpace(line)
+	if !strings.HasPrefix(line, "{") {
+		return GateEvent{}, false
+	}
+	var gate GateEvent
+	if err := json.Unmarshal([]byte(line), &gate); err != nil || gate.Crane != "gate" {
+		return GateEvent{}, false
+	}
+	return gate, true
+}
+
+func applyGateEvent(
+	gate GateEvent,
+	pythonReference *BoolGate,
+	surface *RatioGate,
+	help *RatioGate,
+	functional *RatioGate,
+	stateDiff *RatioGate,
+	behaviorContracts *RatioGate,
+	knownExceptions *int,
+	pythonTests *BoolGate,
+	benchmarks *BoolGate,
+) {
+	switch gate.Name {
+	case "python_reference":
+		*pythonReference = BoolGate{Seen: true, Passed: gate.Passed}
+	case "surface":
+		*surface = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
+	case "help":
+		*help = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
+	case "functional":
+		*functional = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
+	case "state_diff":
+		*stateDiff = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
+	case "python_behavior_contracts":
+		*behaviorContracts = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
+	case "known_exceptions":
+		*knownExceptions = gate.Count
+	case "python_tests":
+		*pythonTests = BoolGate{Seen: true, Passed: gate.Passed}
+	case "benchmarks":
+		*benchmarks = BoolGate{Seen: true, Passed: gate.Passed}
+	}
+}
+
 func isTargetPackage(pkg string) bool {
 	return strings.HasPrefix(pkg, "github.com/githubnext/apm/")
 }

diff --git a/.github/workflows/migration-ci.yml b/.github/workflows/migration-ci.yml
@@ -111,6 +111,7 @@ jobs:
           uv run python scripts/ci/python_behavior_contracts.py check \
             --inventory "$RUNNER_TEMP/python-behavior-contracts.json" \
             --coverage tests/parity/python_contract_coverage.yml \
+            --allow-intentionally-incomplete \
             --summary "$RUNNER_TEMP/python-contract-coverage.md" || true
           python - "$RUNNER_TEMP/migration-score.json" <<'PY'
           import json

diff --git a/cmd/apm/python_behavior_contracts_test.go b/cmd/apm/python_behavior_contracts_test.go
@@ -2,6 +2,7 @@ package main
 
 import (
 	"encoding/json"
+	"fmt"
 	"os"
 	"os/exec"
 	"path/filepath"
@@ -108,6 +109,10 @@ func normalizeContractHelp(text string) string {
 	return strings.TrimRight(strings.Join(lines, "\n"), "\n")
 }
 
+func emitCraneRatioGate(name string, passing, total int) {
+	fmt.Printf("{\"crane\":\"gate\",\"name\":%q,\"passing\":%d,\"total\":%d}\n", name, passing, total)
+}
+
 func TestParityPythonCommandSurfaceFromSource(t *testing.T) {
 	inv := loadPythonBehaviorInventory(t, false)
 	if len(inv.Commands) == 0 {
@@ -190,6 +195,12 @@ func TestParityCompletionPythonBehaviorContracts(t *testing.T) {
 	check.Env = append(os.Environ(), "NO_COLOR=1", "COLUMNS=10000")
 	out, err := check.CombinedOutput()
 	if err != nil {
-		t.Fatalf("Python behavior contracts are not fully covered:\n%s", string(out))
+		emitCraneRatioGate("python_behavior_contracts", 0, 1)
+		if os.Getenv("APM_ENFORCE_PYTHON_BEHAVIOR_CONTRACTS") == "1" {
+			t.Fatalf("Python behavior contracts are not fully covered:\n%s", string(out))
+		}
+		t.Logf("Python behavior contracts are not fully covered; migration remains incomplete:\n%s", string(out))
+		return
 	}
+	emitCraneRatioGate("python_behavior_contracts", 1, 1)
 }
diff --git a/scripts/ci/python_behavior_contracts.py b/scripts/ci/python_behavior_contracts.py
@@ -404,7 +404,16 @@ def cmd_check(args: argparse.Namespace) -> int:
         Path(args.summary).write_text(summary, encoding="utf-8")
     print(summary)
     if coverage.get("status") == "intentionally-incomplete":
-        # Manifest explicitly declared incomplete; report findings without failing.
+        if not args.allow_intentionally_incomplete:
+            print(
+                "coverage manifest declares status: intentionally-incomplete; "
+                "remove that status only after all findings are resolved",
+                file=sys.stderr,
+            )
+            return 1
+        # Report-only mode for progress summaries. Completion checks must not
+        # use this flag, because an intentionally incomplete manifest is not
+        # deletion-grade evidence.
         return 0
     return 1 if findings else 0
 
@@ -425,6 +434,11 @@ def main(argv: list[str] | None = None) -> int:
         help="coverage manifest path",
     )
     check.add_argument("--summary", help="write markdown coverage summary to this path")
+    check.add_argument(
+        "--allow-intentionally-incomplete",
+        action="store_true",
+        help="report findings without failing when the manifest is marked incomplete",
+    )
     check.set_defaults(func=cmd_check)
 
     args = parser.parse_args(argv)

diff --git a/tests/parity/README.md b/tests/parity/README.md
@@ -15,3 +15,8 @@ tests.
 contracts to parity evidence. The completion scorer must not reach
 `migration_score = 1.0` while any extracted command or Python test lacks mapped
 coverage.
+
+`status: intentionally-incomplete` is a progress marker only. It must make
+completion scoring fail; use `--allow-intentionally-incomplete` only for
+report-only summaries. Set `APM_ENFORCE_PYTHON_BEHAVIOR_CONTRACTS=1` when a
+local or CI check should hard-fail instead of reporting incomplete progress.
diff --git a/tests/parity/test_python_behavior_contracts.py b/tests/parity/test_python_behavior_contracts.py
@@ -142,6 +142,14 @@ def test_every_python_command_rejects_unknown_option_consistently(
 def test_python_contract_coverage_manifest_is_complete(inventory: dict[str, object]) -> None:
     coverage = _load_coverage(ROOT / "tests" / "parity" / "python_contract_coverage.yml")
     if coverage.get("status") == "intentionally-incomplete":
-        pytest.skip("Coverage manifest is intentionally incomplete; remove status field to enforce")
+        if os.environ.get("APM_ENFORCE_PYTHON_BEHAVIOR_CONTRACTS") != "1":
+            pytest.xfail(
+                "Coverage manifest is intentionally incomplete; completion gate "
+                "is reported by migration_score"
+            )
+        pytest.fail(
+            "Coverage manifest is intentionally incomplete; remove status field "
+            "only after all contracts are mapped"
+        )
     findings = check_coverage(inventory, coverage)
     assert not findings, render_summary(inventory, findings)
diff --git a/tests/unit/test_crane_score.py b/tests/unit/test_crane_score.py
@@ -95,6 +95,22 @@ def _completion_gate_events() -> list[str]:
     return [line for test in tests for line in _go_pass(test)]
 
 
+def _behavior_contract_gate_output(passing: int, total: int) -> str:
+    return _event(
+        "output",
+        "TestParityCompletionPythonBehaviorContracts",
+        output=json.dumps(
+            {
+                "crane": "gate",
+                "name": "python_behavior_contracts",
+                "passing": passing,
+                "total": total,
+            }
+        )
+        + "\n",
+    )
+
+
 def _gates(score: dict[str, object]) -> dict[str, dict[str, object]]:
     gates = score["gates"]
     assert isinstance(gates, list)
@@ -220,15 +236,49 @@ def test_crane_score_rejects_empty_event_stream() -> None:
     assert "empty or incomplete" in result.stderr
 
 
-def test_crane_score_infers_cutover_gates_from_completion_tests() -> None:
-    score = _run_score([*_parity_passes(293), *_completion_gate_events(), _package_pass()])
+def test_crane_score_reaches_one_with_completion_tests_and_explicit_behavior_gate() -> None:
+    score = _run_score(
+        [
+            *_parity_passes(293),
+            *_completion_gate_events(),
+            _behavior_contract_gate_output(1, 1),
+            _package_pass(),
+        ]
+    )
 
     assert score["migration_score"] == 1.0
     assert score["progress"] == 1.0
     assert score["deletion_grade_ready"] is True
     assert all(gate["passing"] for gate in _gates(score).values())
 
 
+def test_crane_score_does_not_infer_behavior_contracts_from_test_name() -> None:
+    score = _run_score([*_parity_passes(293), *_completion_gate_events(), _package_pass()])
+    gates = _gates(score)
+
+    assert score["progress"] == 1.0
+    assert score["migration_score"] < 1.0
+    assert score["deletion_grade_ready"] is False
+    assert gates["python_behavior_contracts"]["passing"] is False
+
+
+def test_crane_score_blocks_incomplete_behavior_contract_gate() -> None:
+    score = _run_score(
+        [
+            *_parity_passes(293),
+            *_completion_gate_events(),
+            _behavior_contract_gate_output(0, 1),
+            _package_pass(),
+        ]
+    )
+    gates = _gates(score)
+
+    assert score["progress"] == 1.0
+    assert score["migration_score"] < 1.0
+    assert score["deletion_grade_ready"] is False
+    assert gates["python_behavior_contracts"]["passing"] is False
+
+
 def test_crane_score_blocks_known_exceptions() -> None:
     score = _run_score(
         [