Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions packages/specfact-code-review/module-package.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: nold-ai/specfact-code-review
version: 0.46.1
version: 0.46.4
commands:
- code
tier: official
Expand All @@ -23,5 +23,5 @@ description: Official SpecFact code review bundle package.
category: codebase
bundle_group_command: code
integrity:
checksum: sha256:13a5a27fc13e2db6d36573b42482a1fe9d294e100738f6150ad0468868581dd1
signature: ulRchbUE1q1JhCuTRUPUolICZMiWDBCQgNmNt4YNbtyRysXMhiIcge/KJ1X1VqKtzWu4lzvEDf0OInPJ2lRxBA==
checksum: sha256:ee7d224ac2a7894cc67d3e052764137b034e089624d5007989cab818839e5449
signature: V+GNklTfgmdYKDWgp53SDw4s1R5GE1UF/745CVnXFJ0v3WSCWLY8APqyuabetBU6/Z1UQ1lKfEiRiZOFJw7WBg==
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,23 @@ def _is_empty_init_file(source_file: Path) -> bool:
return stripped_content in ("", "pass")


def _is_coverage_omitted_init_by_project_policy(source_file: Path) -> bool:
"""True when repo coverage omits this file (``pyproject.toml`` ``[tool.coverage.run]`` ``omit``).

``src/**/__init__.py`` and ``packages/**/__init__.py`` are omitted from coverage; the pytest-cov
JSON report therefore has no ``percent_covered`` for them — not a TDD gap.
"""
try:
path = source_file if source_file.is_absolute() else (Path.cwd() / source_file).resolve()
rel = path.relative_to(Path.cwd().resolve())
except (ValueError, OSError):
rel = source_file
if rel.name != "__init__.py":
return False
parts = rel.parts
return len(parts) >= 2 and parts[0] in ("src", "packages")


def _coverage_findings(
source_files: list[Path],
coverage_payload: dict[str, object],
Expand All @@ -312,6 +329,8 @@ def _coverage_findings(
if percent_covered is None:
if source_file.name == "__init__.py" and _is_empty_init_file(source_file):
continue # Exempt empty __init__.py files
if _is_coverage_omitted_init_by_project_policy(source_file):
continue
return [
tool_error(
tool="pytest",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
from specfact_code_review.tools.pylint_runner import run_pylint
from specfact_code_review.tools.radon_runner import run_radon
from specfact_code_review.tools.ruff_runner import run_ruff
from specfact_code_review.tools.semgrep_runner import run_semgrep
from specfact_code_review.tools.semgrep_runner import find_semgrep_config, run_semgrep


__all__ = [
"find_semgrep_config",
"run_ast_clean_code",
"run_basedpyright",
"run_contract_check",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
}
SEMGREP_TIMEOUT_SECONDS = 90
SEMGREP_RETRY_ATTEMPTS = 2
_SEMGREP_STDERR_SNIP_MAX = 4000
_MAX_CONFIG_PARENT_WALK = 32
SemgrepCategory = Literal["clean_code", "architecture", "naming"]


Expand Down Expand Up @@ -73,15 +75,53 @@ def _normalize_rule_id(rule: str) -> str:
return rule


def _resolve_config_path() -> Path:
package_root = Path(__file__).resolve().parents[3]
config_path = package_root / ".semgrep" / "clean_code.yaml"
if config_path.exists():
return config_path
raise FileNotFoundError(f"Semgrep config not found at {config_path}")
def _is_bundle_boundary(directory: Path) -> bool:
"""True when ``directory`` is a sensible workspace root (do not search parents above it)."""
return (directory / ".git").exists() or (directory / "module-package.yaml").is_file()


def _run_semgrep_command(files: list[Path]) -> subprocess.CompletedProcess[str]:
@beartype
@require(lambda bundle_root, module_file: bundle_root is None or isinstance(bundle_root, Path))
@require(lambda bundle_root, module_file: module_file is None or isinstance(module_file, Path))
@ensure(lambda result: isinstance(result, Path), "result must be a Path")
def find_semgrep_config(
*,
bundle_root: Path | None = None,
module_file: Path | None = None,
) -> Path:
"""Locate ``.semgrep/clean_code.yaml`` for this package or an explicit bundle root.

When ``bundle_root`` is set, only ``bundle_root/.semgrep/clean_code.yaml`` is considered.

Otherwise walk parents of ``module_file`` (default: this module), checking each directory for
``.semgrep/clean_code.yaml``. Stop ascending when a bundle boundary is hit (``.git`` or
``module-package.yaml``) so unrelated trees (e.g. a stray ``~/.semgrep``) are never used.
"""
if bundle_root is not None:
br = bundle_root.resolve()
candidate = br / ".semgrep" / "clean_code.yaml"
if candidate.is_file():
return candidate
raise FileNotFoundError(f"Semgrep config not found at {candidate} (bundle_root={br})")

here = (module_file if module_file is not None else Path(__file__)).resolve()
for depth, parent in enumerate([here.parent, *here.parents]):
if depth > _MAX_CONFIG_PARENT_WALK:
break
if parent == parent.parent:
break
candidate = parent / ".semgrep" / "clean_code.yaml"
if candidate.is_file():
return candidate
if _is_bundle_boundary(parent):
break
# Installed wheels live under site-packages; do not walk into lib/, $HOME, or other trees.
if parent.name == "site-packages":
break
raise FileNotFoundError(f"Semgrep config not found (no .semgrep/clean_code.yaml under bundle for {here})")


def _run_semgrep_command(files: list[Path], *, bundle_root: Path | None) -> subprocess.CompletedProcess[str]:
with tempfile.TemporaryDirectory(prefix="semgrep-home-") as temp_home:
semgrep_home = Path(temp_home)
semgrep_log_dir = semgrep_home / ".semgrep"
Expand All @@ -96,8 +136,9 @@ def _run_semgrep_command(files: list[Path]) -> subprocess.CompletedProcess[str]:
[
"semgrep",
"--disable-version-check",
"--quiet",
"--config",
str(_resolve_config_path()),
str(find_semgrep_config(bundle_root=bundle_root)),
"--json",
*(str(file_path) for file_path in files),
],
Expand All @@ -109,12 +150,24 @@ def _run_semgrep_command(files: list[Path]) -> subprocess.CompletedProcess[str]:
)


def _load_semgrep_results(files: list[Path]) -> list[object]:
def _snip_stderr_tail(stderr: str) -> str:
"""Keep the last ``_SEMGREP_STDERR_SNIP_MAX`` chars so the most recent diagnostics stay visible."""
err_raw = stderr.strip()
if len(err_raw) <= _SEMGREP_STDERR_SNIP_MAX:
return err_raw
return "…" + err_raw[-_SEMGREP_STDERR_SNIP_MAX:]


def _load_semgrep_results(files: list[Path], *, bundle_root: Path | None) -> list[object]:
last_error: Exception | None = None
for _attempt in range(SEMGREP_RETRY_ATTEMPTS):
try:
result = _run_semgrep_command(files)
return _parse_semgrep_results(json.loads(result.stdout))
result = _run_semgrep_command(files, bundle_root=bundle_root)
raw_out = result.stdout.strip()
if not raw_out:
err_tail = _snip_stderr_tail(result.stderr or "")
raise ValueError(f"semgrep returned empty stdout (returncode={result.returncode}); stderr={err_tail!r}")
return _parse_semgrep_results(json.loads(raw_out))
except (FileNotFoundError, OSError, ValueError, json.JSONDecodeError, subprocess.TimeoutExpired) as exc:
last_error = exc
if last_error is None:
Expand Down Expand Up @@ -189,13 +242,20 @@ def _finding_from_result(item: dict[str, object], *, allowed_paths: set[str]) ->
lambda result: all(isinstance(finding, ReviewFinding) for finding in result),
"result must contain ReviewFinding instances",
)
def run_semgrep(files: list[Path]) -> list[ReviewFinding]:
"""Run Semgrep for the provided files and map findings into ReviewFinding records."""
def run_semgrep(files: list[Path], *, bundle_root: Path | None = None) -> list[ReviewFinding]:
"""Run Semgrep for the provided files and map findings into ReviewFinding records.

Args:
files: Paths to scan.
bundle_root: Optional directory that contains ``.semgrep/clean_code.yaml`` (e.g. extracted
bundle root). When omitted, config is resolved from this package upward until a bundle
boundary (``.git`` or ``module-package.yaml``).
"""
if not files:
return []

try:
raw_results = _load_semgrep_results(files)
raw_results = _load_semgrep_results(files, bundle_root=bundle_root)
except (FileNotFoundError, OSError, ValueError, json.JSONDecodeError, subprocess.TimeoutExpired) as exc:
return _tool_error(files[0], f"Unable to parse Semgrep output: {exc}")

Expand Down
6 changes: 3 additions & 3 deletions registry/index.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@
},
{
"id": "nold-ai/specfact-code-review",
"latest_version": "0.46.1",
"download_url": "modules/specfact-code-review-0.46.1.tar.gz",
"checksum_sha256": "cbd954a055dfa83bab76880350f3bdc5652046ed4527d0c0ff0d1550e51be1af",
"latest_version": "0.46.4",
"download_url": "modules/specfact-code-review-0.46.4.tar.gz",
"checksum_sha256": "caecd26d6e6308ed88047385e0a9579c5336665d46e8118c3ae9caf4cbd786c8",
"core_compatibility": ">=0.44.0,<1.0.0",
"tier": "official",
"publisher": {
Expand Down
Binary file not shown.
1 change: 1 addition & 0 deletions registry/modules/specfact-code-review-0.46.3.tar.gz.sha256
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
58f3f9b01a13247b8109f856cb96abfd955e21a70ffafa37ceebf587d402888e
Binary file not shown.
1 change: 1 addition & 0 deletions registry/modules/specfact-code-review-0.46.4.tar.gz.sha256
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
caecd26d6e6308ed88047385e0a9579c5336665d46e8118c3ae9caf4cbd786c8
1 change: 1 addition & 0 deletions registry/signatures/specfact-code-review-0.46.3.tar.sig
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
qAX9yIBO+UBsqa4ODAym4YrtWlGhSoyiIb4b/UUWKKN+vv8yNentvwR8mHd32I7BnF+TBiHHmonH+ktPV9XSCQ==
1 change: 1 addition & 0 deletions registry/signatures/specfact-code-review-0.46.4.tar.sig
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
V+GNklTfgmdYKDWgp53SDw4s1R5GE1UF/745CVnXFJ0v3WSCWLY8APqyuabetBU6/Z1UQ1lKfEiRiZOFJw7WBg==
9 changes: 4 additions & 5 deletions tests/unit/specfact_code_review/run/test_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,15 +471,14 @@ def test_coverage_findings_skips_package_initializers_without_coverage_data() ->
assert coverage_by_source == {}


def test_coverage_findings_does_not_skip_non_empty_package_initializers() -> None:
def test_coverage_findings_skips_package_initializers_omitted_from_coverage_reports() -> None:
"""``packages/**/__init__.py`` is omitted in coverage config; JSON has no per-file summary."""
source_file = Path("packages/specfact-code-review/src/specfact_code_review/tools/__init__.py")

findings, coverage_by_source = _coverage_findings([source_file], {"files": {}})

assert len(findings) == 1
assert findings[0].category == "tool_error"
assert "Coverage data missing" in findings[0].message
assert coverage_by_source is None
assert not findings
assert coverage_by_source == {}


def test_run_pytest_with_coverage_disables_global_fail_under(monkeypatch: MonkeyPatch) -> None:
Expand Down
31 changes: 30 additions & 1 deletion tests/unit/specfact_code_review/tools/test_semgrep_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import pytest
from pytest import MonkeyPatch

from specfact_code_review.tools.semgrep_runner import run_semgrep
from specfact_code_review.tools.semgrep_runner import _snip_stderr_tail, find_semgrep_config, run_semgrep
from tests.unit.specfact_code_review.tools.helpers import completed_process


Expand Down Expand Up @@ -177,6 +177,35 @@ def test_run_semgrep_ignores_unsupported_rules(tmp_path: Path, monkeypatch: Monk
assert not findings


def test_find_semgrep_config_with_explicit_bundle_root(tmp_path: Path) -> None:
root = tmp_path / "bundle"
(root / ".semgrep").mkdir(parents=True)
(root / ".semgrep" / "clean_code.yaml").write_text("rules: []\n", encoding="utf-8")
assert find_semgrep_config(bundle_root=root) == root / ".semgrep" / "clean_code.yaml"


def test_snip_stderr_tail_keeps_last_chars() -> None:
"""Long stderr should retain the suffix (most recent diagnostics), not the prefix."""
long = "UNIQUE_HEAD_MARKER" + ("A" * 5000) + "END_OF_ERROR"
out = _snip_stderr_tail(long)
assert "END_OF_ERROR" in out
assert out.startswith("…")
assert "UNIQUE_HEAD_MARKER" not in out


def test_find_semgrep_config_stops_at_git_directory(tmp_path: Path) -> None:
"""Do not resolve a config outside the repo (e.g. stray ~/.semgrep)."""
repo = tmp_path / "repo"
(repo / ".git").mkdir(parents=True)
nested = repo / "nested" / "pkg" / "tools"
nested.mkdir(parents=True)
(repo / "nested" / ".semgrep").mkdir(parents=True)
(repo / "nested" / ".semgrep" / "clean_code.yaml").write_text("rules: []\n", encoding="utf-8")
fake_here = nested / "runner.py"
fake_here.write_text("#", encoding="utf-8")
assert find_semgrep_config(module_file=fake_here) == repo / "nested" / ".semgrep" / "clean_code.yaml"


def test_run_semgrep_retries_after_transient_parse_failure(tmp_path: Path, monkeypatch: MonkeyPatch) -> None:
file_path = tmp_path / "target.py"
payload = {
Expand Down
Loading