diff --git a/packages/specfact-code-review/module-package.yaml b/packages/specfact-code-review/module-package.yaml index 85ae462..68caac3 100644 --- a/packages/specfact-code-review/module-package.yaml +++ b/packages/specfact-code-review/module-package.yaml @@ -1,5 +1,5 @@ name: nold-ai/specfact-code-review -version: 0.46.1 +version: 0.46.4 commands: - code tier: official @@ -23,5 +23,5 @@ description: Official SpecFact code review bundle package. category: codebase bundle_group_command: code integrity: - checksum: sha256:13a5a27fc13e2db6d36573b42482a1fe9d294e100738f6150ad0468868581dd1 - signature: ulRchbUE1q1JhCuTRUPUolICZMiWDBCQgNmNt4YNbtyRysXMhiIcge/KJ1X1VqKtzWu4lzvEDf0OInPJ2lRxBA== + checksum: sha256:ee7d224ac2a7894cc67d3e052764137b034e089624d5007989cab818839e5449 + signature: V+GNklTfgmdYKDWgp53SDw4s1R5GE1UF/745CVnXFJ0v3WSCWLY8APqyuabetBU6/Z1UQ1lKfEiRiZOFJw7WBg== diff --git a/packages/specfact-code-review/src/specfact_code_review/run/runner.py b/packages/specfact-code-review/src/specfact_code_review/run/runner.py index 4ccd86f..53426cc 100644 --- a/packages/specfact-code-review/src/specfact_code_review/run/runner.py +++ b/packages/specfact-code-review/src/specfact_code_review/run/runner.py @@ -301,6 +301,23 @@ def _is_empty_init_file(source_file: Path) -> bool: return stripped_content in ("", "pass") +def _is_coverage_omitted_init_by_project_policy(source_file: Path) -> bool: + """True when repo coverage omits this file (``pyproject.toml`` ``[tool.coverage.run]`` ``omit``). + + ``src/**/__init__.py`` and ``packages/**/__init__.py`` are omitted from coverage; the pytest-cov + JSON report therefore has no ``percent_covered`` for them — not a TDD gap. + """ + try: + path = source_file if source_file.is_absolute() else (Path.cwd() / source_file).resolve() + rel = path.relative_to(Path.cwd().resolve()) + except (ValueError, OSError): + rel = source_file + if rel.name != "__init__.py": + return False + parts = rel.parts + return len(parts) >= 2 and parts[0] in ("src", "packages") + + def _coverage_findings( source_files: list[Path], coverage_payload: dict[str, object], @@ -312,6 +329,8 @@ def _coverage_findings( if percent_covered is None: if source_file.name == "__init__.py" and _is_empty_init_file(source_file): continue # Exempt empty __init__.py files + if _is_coverage_omitted_init_by_project_policy(source_file): + continue return [ tool_error( tool="pytest", diff --git a/packages/specfact-code-review/src/specfact_code_review/tools/__init__.py b/packages/specfact-code-review/src/specfact_code_review/tools/__init__.py index 85e7b19..6911d4c 100644 --- a/packages/specfact-code-review/src/specfact_code_review/tools/__init__.py +++ b/packages/specfact-code-review/src/specfact_code_review/tools/__init__.py @@ -6,10 +6,11 @@ from specfact_code_review.tools.pylint_runner import run_pylint from specfact_code_review.tools.radon_runner import run_radon from specfact_code_review.tools.ruff_runner import run_ruff -from specfact_code_review.tools.semgrep_runner import run_semgrep +from specfact_code_review.tools.semgrep_runner import find_semgrep_config, run_semgrep __all__ = [ + "find_semgrep_config", "run_ast_clean_code", "run_basedpyright", "run_contract_check", diff --git a/packages/specfact-code-review/src/specfact_code_review/tools/semgrep_runner.py b/packages/specfact-code-review/src/specfact_code_review/tools/semgrep_runner.py index 0fc7f2a..7118cd7 100644 --- a/packages/specfact-code-review/src/specfact_code_review/tools/semgrep_runner.py +++ b/packages/specfact-code-review/src/specfact_code_review/tools/semgrep_runner.py @@ -26,6 +26,8 @@ } SEMGREP_TIMEOUT_SECONDS = 90 SEMGREP_RETRY_ATTEMPTS = 2 +_SEMGREP_STDERR_SNIP_MAX = 4000 +_MAX_CONFIG_PARENT_WALK = 32 SemgrepCategory = Literal["clean_code", "architecture", "naming"] @@ -73,15 +75,53 @@ def _normalize_rule_id(rule: str) -> str: return rule -def _resolve_config_path() -> Path: - package_root = Path(__file__).resolve().parents[3] - config_path = package_root / ".semgrep" / "clean_code.yaml" - if config_path.exists(): - return config_path - raise FileNotFoundError(f"Semgrep config not found at {config_path}") +def _is_bundle_boundary(directory: Path) -> bool: + """True when ``directory`` is a sensible workspace root (do not search parents above it).""" + return (directory / ".git").exists() or (directory / "module-package.yaml").is_file() -def _run_semgrep_command(files: list[Path]) -> subprocess.CompletedProcess[str]: +@beartype +@require(lambda bundle_root, module_file: bundle_root is None or isinstance(bundle_root, Path)) +@require(lambda bundle_root, module_file: module_file is None or isinstance(module_file, Path)) +@ensure(lambda result: isinstance(result, Path), "result must be a Path") +def find_semgrep_config( + *, + bundle_root: Path | None = None, + module_file: Path | None = None, +) -> Path: + """Locate ``.semgrep/clean_code.yaml`` for this package or an explicit bundle root. + + When ``bundle_root`` is set, only ``bundle_root/.semgrep/clean_code.yaml`` is considered. + + Otherwise walk parents of ``module_file`` (default: this module), checking each directory for + ``.semgrep/clean_code.yaml``. Stop ascending when a bundle boundary is hit (``.git`` or + ``module-package.yaml``) so unrelated trees (e.g. a stray ``~/.semgrep``) are never used. + """ + if bundle_root is not None: + br = bundle_root.resolve() + candidate = br / ".semgrep" / "clean_code.yaml" + if candidate.is_file(): + return candidate + raise FileNotFoundError(f"Semgrep config not found at {candidate} (bundle_root={br})") + + here = (module_file if module_file is not None else Path(__file__)).resolve() + for depth, parent in enumerate([here.parent, *here.parents]): + if depth > _MAX_CONFIG_PARENT_WALK: + break + if parent == parent.parent: + break + candidate = parent / ".semgrep" / "clean_code.yaml" + if candidate.is_file(): + return candidate + if _is_bundle_boundary(parent): + break + # Installed wheels live under site-packages; do not walk into lib/, $HOME, or other trees. + if parent.name == "site-packages": + break + raise FileNotFoundError(f"Semgrep config not found (no .semgrep/clean_code.yaml under bundle for {here})") + + +def _run_semgrep_command(files: list[Path], *, bundle_root: Path | None) -> subprocess.CompletedProcess[str]: with tempfile.TemporaryDirectory(prefix="semgrep-home-") as temp_home: semgrep_home = Path(temp_home) semgrep_log_dir = semgrep_home / ".semgrep" @@ -96,8 +136,9 @@ def _run_semgrep_command(files: list[Path]) -> subprocess.CompletedProcess[str]: [ "semgrep", "--disable-version-check", + "--quiet", "--config", - str(_resolve_config_path()), + str(find_semgrep_config(bundle_root=bundle_root)), "--json", *(str(file_path) for file_path in files), ], @@ -109,12 +150,24 @@ def _run_semgrep_command(files: list[Path]) -> subprocess.CompletedProcess[str]: ) -def _load_semgrep_results(files: list[Path]) -> list[object]: +def _snip_stderr_tail(stderr: str) -> str: + """Keep the last ``_SEMGREP_STDERR_SNIP_MAX`` chars so the most recent diagnostics stay visible.""" + err_raw = stderr.strip() + if len(err_raw) <= _SEMGREP_STDERR_SNIP_MAX: + return err_raw + return "…" + err_raw[-_SEMGREP_STDERR_SNIP_MAX:] + + +def _load_semgrep_results(files: list[Path], *, bundle_root: Path | None) -> list[object]: last_error: Exception | None = None for _attempt in range(SEMGREP_RETRY_ATTEMPTS): try: - result = _run_semgrep_command(files) - return _parse_semgrep_results(json.loads(result.stdout)) + result = _run_semgrep_command(files, bundle_root=bundle_root) + raw_out = result.stdout.strip() + if not raw_out: + err_tail = _snip_stderr_tail(result.stderr or "") + raise ValueError(f"semgrep returned empty stdout (returncode={result.returncode}); stderr={err_tail!r}") + return _parse_semgrep_results(json.loads(raw_out)) except (FileNotFoundError, OSError, ValueError, json.JSONDecodeError, subprocess.TimeoutExpired) as exc: last_error = exc if last_error is None: @@ -189,13 +242,20 @@ def _finding_from_result(item: dict[str, object], *, allowed_paths: set[str]) -> lambda result: all(isinstance(finding, ReviewFinding) for finding in result), "result must contain ReviewFinding instances", ) -def run_semgrep(files: list[Path]) -> list[ReviewFinding]: - """Run Semgrep for the provided files and map findings into ReviewFinding records.""" +def run_semgrep(files: list[Path], *, bundle_root: Path | None = None) -> list[ReviewFinding]: + """Run Semgrep for the provided files and map findings into ReviewFinding records. + + Args: + files: Paths to scan. + bundle_root: Optional directory that contains ``.semgrep/clean_code.yaml`` (e.g. extracted + bundle root). When omitted, config is resolved from this package upward until a bundle + boundary (``.git`` or ``module-package.yaml``). + """ if not files: return [] try: - raw_results = _load_semgrep_results(files) + raw_results = _load_semgrep_results(files, bundle_root=bundle_root) except (FileNotFoundError, OSError, ValueError, json.JSONDecodeError, subprocess.TimeoutExpired) as exc: return _tool_error(files[0], f"Unable to parse Semgrep output: {exc}") diff --git a/registry/index.json b/registry/index.json index 54459f6..935d33a 100644 --- a/registry/index.json +++ b/registry/index.json @@ -78,9 +78,9 @@ }, { "id": "nold-ai/specfact-code-review", - "latest_version": "0.46.1", - "download_url": "modules/specfact-code-review-0.46.1.tar.gz", - "checksum_sha256": "cbd954a055dfa83bab76880350f3bdc5652046ed4527d0c0ff0d1550e51be1af", + "latest_version": "0.46.4", + "download_url": "modules/specfact-code-review-0.46.4.tar.gz", + "checksum_sha256": "caecd26d6e6308ed88047385e0a9579c5336665d46e8118c3ae9caf4cbd786c8", "core_compatibility": ">=0.44.0,<1.0.0", "tier": "official", "publisher": { diff --git a/registry/modules/specfact-code-review-0.46.3.tar.gz b/registry/modules/specfact-code-review-0.46.3.tar.gz new file mode 100644 index 0000000..db33048 Binary files /dev/null and b/registry/modules/specfact-code-review-0.46.3.tar.gz differ diff --git a/registry/modules/specfact-code-review-0.46.3.tar.gz.sha256 b/registry/modules/specfact-code-review-0.46.3.tar.gz.sha256 new file mode 100644 index 0000000..b9a4deb --- /dev/null +++ b/registry/modules/specfact-code-review-0.46.3.tar.gz.sha256 @@ -0,0 +1 @@ +58f3f9b01a13247b8109f856cb96abfd955e21a70ffafa37ceebf587d402888e diff --git a/registry/modules/specfact-code-review-0.46.4.tar.gz b/registry/modules/specfact-code-review-0.46.4.tar.gz new file mode 100644 index 0000000..49d7f37 Binary files /dev/null and b/registry/modules/specfact-code-review-0.46.4.tar.gz differ diff --git a/registry/modules/specfact-code-review-0.46.4.tar.gz.sha256 b/registry/modules/specfact-code-review-0.46.4.tar.gz.sha256 new file mode 100644 index 0000000..fc9c6fe --- /dev/null +++ b/registry/modules/specfact-code-review-0.46.4.tar.gz.sha256 @@ -0,0 +1 @@ +caecd26d6e6308ed88047385e0a9579c5336665d46e8118c3ae9caf4cbd786c8 diff --git a/registry/signatures/specfact-code-review-0.46.3.tar.sig b/registry/signatures/specfact-code-review-0.46.3.tar.sig new file mode 100644 index 0000000..a5c1461 --- /dev/null +++ b/registry/signatures/specfact-code-review-0.46.3.tar.sig @@ -0,0 +1 @@ +qAX9yIBO+UBsqa4ODAym4YrtWlGhSoyiIb4b/UUWKKN+vv8yNentvwR8mHd32I7BnF+TBiHHmonH+ktPV9XSCQ== diff --git a/registry/signatures/specfact-code-review-0.46.4.tar.sig b/registry/signatures/specfact-code-review-0.46.4.tar.sig new file mode 100644 index 0000000..5d58a76 --- /dev/null +++ b/registry/signatures/specfact-code-review-0.46.4.tar.sig @@ -0,0 +1 @@ +V+GNklTfgmdYKDWgp53SDw4s1R5GE1UF/745CVnXFJ0v3WSCWLY8APqyuabetBU6/Z1UQ1lKfEiRiZOFJw7WBg== diff --git a/tests/unit/specfact_code_review/run/test_runner.py b/tests/unit/specfact_code_review/run/test_runner.py index 01df32e..2bb18fb 100644 --- a/tests/unit/specfact_code_review/run/test_runner.py +++ b/tests/unit/specfact_code_review/run/test_runner.py @@ -471,15 +471,14 @@ def test_coverage_findings_skips_package_initializers_without_coverage_data() -> assert coverage_by_source == {} -def test_coverage_findings_does_not_skip_non_empty_package_initializers() -> None: +def test_coverage_findings_skips_package_initializers_omitted_from_coverage_reports() -> None: + """``packages/**/__init__.py`` is omitted in coverage config; JSON has no per-file summary.""" source_file = Path("packages/specfact-code-review/src/specfact_code_review/tools/__init__.py") findings, coverage_by_source = _coverage_findings([source_file], {"files": {}}) - assert len(findings) == 1 - assert findings[0].category == "tool_error" - assert "Coverage data missing" in findings[0].message - assert coverage_by_source is None + assert not findings + assert coverage_by_source == {} def test_run_pytest_with_coverage_disables_global_fail_under(monkeypatch: MonkeyPatch) -> None: diff --git a/tests/unit/specfact_code_review/tools/test_semgrep_runner.py b/tests/unit/specfact_code_review/tools/test_semgrep_runner.py index 9a0ef29..93156c2 100644 --- a/tests/unit/specfact_code_review/tools/test_semgrep_runner.py +++ b/tests/unit/specfact_code_review/tools/test_semgrep_runner.py @@ -9,7 +9,7 @@ import pytest from pytest import MonkeyPatch -from specfact_code_review.tools.semgrep_runner import run_semgrep +from specfact_code_review.tools.semgrep_runner import _snip_stderr_tail, find_semgrep_config, run_semgrep from tests.unit.specfact_code_review.tools.helpers import completed_process @@ -177,6 +177,35 @@ def test_run_semgrep_ignores_unsupported_rules(tmp_path: Path, monkeypatch: Monk assert not findings +def test_find_semgrep_config_with_explicit_bundle_root(tmp_path: Path) -> None: + root = tmp_path / "bundle" + (root / ".semgrep").mkdir(parents=True) + (root / ".semgrep" / "clean_code.yaml").write_text("rules: []\n", encoding="utf-8") + assert find_semgrep_config(bundle_root=root) == root / ".semgrep" / "clean_code.yaml" + + +def test_snip_stderr_tail_keeps_last_chars() -> None: + """Long stderr should retain the suffix (most recent diagnostics), not the prefix.""" + long = "UNIQUE_HEAD_MARKER" + ("A" * 5000) + "END_OF_ERROR" + out = _snip_stderr_tail(long) + assert "END_OF_ERROR" in out + assert out.startswith("…") + assert "UNIQUE_HEAD_MARKER" not in out + + +def test_find_semgrep_config_stops_at_git_directory(tmp_path: Path) -> None: + """Do not resolve a config outside the repo (e.g. stray ~/.semgrep).""" + repo = tmp_path / "repo" + (repo / ".git").mkdir(parents=True) + nested = repo / "nested" / "pkg" / "tools" + nested.mkdir(parents=True) + (repo / "nested" / ".semgrep").mkdir(parents=True) + (repo / "nested" / ".semgrep" / "clean_code.yaml").write_text("rules: []\n", encoding="utf-8") + fake_here = nested / "runner.py" + fake_here.write_text("#", encoding="utf-8") + assert find_semgrep_config(module_file=fake_here) == repo / "nested" / ".semgrep" / "clean_code.yaml" + + def test_run_semgrep_retries_after_transient_parse_failure(tmp_path: Path, monkeypatch: MonkeyPatch) -> None: file_path = tmp_path / "target.py" payload = {