From f7a775784cd983257029a71674c0b3b40216993f Mon Sep 17 00:00:00 2001 From: CharmingGroot Date: Fri, 12 Jun 2026 11:18:25 +0900 Subject: [PATCH] fix(supply-chain): exclude pyproject metadata keys from dependency extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SC4 reported a HIGH "Known Vulnerable Dependency" on the `requires-python` key of pyproject.toml, matching it to the malicious PyPI package literally named `requires-python` (MAL-2025-41747). pyproject.toml was parsed with the requirements.txt line extractor, which treats any `key = value` line as a package, so PEP 621 metadata keys (`requires-python`, `name`, `version`, ...) were looked up as packages — a false HIGH on essentially every Python project. Add `_extract_packages_from_pyproject` (using stdlib `tomllib`) that pulls package names only from PEP 621 `dependencies` / `optional-dependencies` and PEP 735 `dependency-groups`, and route pyproject.toml to it. requirements.txt, setup.py, and Pipfile keep the existing extractor. Add tests for metadata-key exclusion, optional/group dependency extraction, malformed TOML, and that real vulnerable deps in pyproject are still flagged. Signed-off-by: CharmingGroot --- .../analyzers/static_patterns_supply_chain.py | 49 ++++++++++++++- tests/unit/test_patterns_new.py | 60 +++++++++++++++++++ 2 files changed, 108 insertions(+), 1 deletion(-) diff --git a/src/skillspector/nodes/analyzers/static_patterns_supply_chain.py b/src/skillspector/nodes/analyzers/static_patterns_supply_chain.py index 3a4fcac..ecde735 100644 --- a/src/skillspector/nodes/analyzers/static_patterns_supply_chain.py +++ b/src/skillspector/nodes/analyzers/static_patterns_supply_chain.py @@ -28,6 +28,7 @@ import re import sys +import tomllib from skillspector.logging_config import get_logger from skillspector.models import AnalyzerFinding, Finding, Location, Severity @@ -423,6 +424,49 @@ def _extract_packages_from_package_json(content: str) -> list[tuple[str, str | N return results +def _extract_packages_from_pyproject(content: str) -> list[tuple[str, str | None, int]]: + """Extract (package_name, version_or_None, line_number) from pyproject.toml. + + Only PEP 621 ``[project]`` ``dependencies`` / ``optional-dependencies`` and + PEP 735 ``[dependency-groups]`` hold real packages. Standard metadata keys + (``requires-python``, ``name``, ``version``, ...) are not dependencies and + must not be looked up as packages. + """ + try: + data = tomllib.loads(content) + except tomllib.TOMLDecodeError: + return [] + + specs: list[str] = [] + project = data.get("project") + if isinstance(project, dict): + deps = project.get("dependencies") + if isinstance(deps, list): + specs.extend(d for d in deps if isinstance(d, str)) + optional = project.get("optional-dependencies") + if isinstance(optional, dict): + for group in optional.values(): + if isinstance(group, list): + specs.extend(d for d in group if isinstance(d, str)) + groups = data.get("dependency-groups") + if isinstance(groups, dict): + for group in groups.values(): + if isinstance(group, list): + specs.extend(d for d in group if isinstance(d, str)) + + results: list[tuple[str, str | None, int]] = [] + for spec in specs: + m = re.match(r"^([a-zA-Z][a-zA-Z0-9._-]*)(?:\[.*?\])?\s*(?:([=<>!~]=?)\s*([\d.*]+))?", spec) + if not m: + continue + name = m.group(1) + version = m.group(3) if m.group(2) in ("==", "<=") else None + idx = content.find(spec) + line_num = get_line_number(content, idx) if idx >= 0 else 1 + results.append((name, version, line_num)) + return results + + def _version_lt(v1: str, v2: str) -> bool: """Simple version comparison: True if v1 < v2 (numeric tuple comparison).""" @@ -695,7 +739,10 @@ def _analyze_dependencies( return findings if is_python_dep: - packages = _extract_packages_from_requirements(content) + if "pyproject.toml" in lower_path: + packages = _extract_packages_from_pyproject(content) + else: + packages = _extract_packages_from_requirements(content) ecosystem = ECOSYSTEM_PYPI fallback_db = _FALLBACK_VULNERABLE_PYPI popular = _POPULAR_PYPI diff --git a/tests/unit/test_patterns_new.py b/tests/unit/test_patterns_new.py index 329e2aa..0d77024 100644 --- a/tests/unit/test_patterns_new.py +++ b/tests/unit/test_patterns_new.py @@ -869,6 +869,66 @@ def test_non_dependency_file_skipped(self) -> None: findings = sc_mod._analyze_dependencies("requests==2.31.0\n", "README.md") assert len(findings) == 0 + def test_pyproject_metadata_keys_not_treated_as_packages(self) -> None: + """PEP 621 metadata keys (requires-python, name, ...) are not dependencies.""" + content = ( + "[project]\n" + 'name = "example"\n' + 'version = "0.1.0"\n' + 'requires-python = ">=3.12"\n' + 'dependencies = ["httpx>=0.28"]\n' + ) + names = [p[0] for p in sc_mod._extract_packages_from_pyproject(content)] + assert names == ["httpx"] + + def test_pyproject_optional_and_group_deps_extracted(self) -> None: + """optional-dependencies and PEP 735 dependency-groups are real packages.""" + content = ( + "[project]\n" + 'dependencies = ["httpx"]\n' + "[project.optional-dependencies]\n" + 'test = ["pytest>=8"]\n' + "[dependency-groups]\n" + 'dev = ["ruff"]\n' + ) + names = sorted(p[0] for p in sc_mod._extract_packages_from_pyproject(content)) + assert names == ["httpx", "pytest", "ruff"] + + def test_pyproject_malformed_returns_no_packages(self) -> None: + """Unparseable TOML yields no packages rather than raising.""" + assert sc_mod._extract_packages_from_pyproject("[project\nbroken =") == [] + + def test_pyproject_vanilla_has_no_findings(self) -> None: + """A normal pyproject.toml produces no SC findings (regression for issue #2).""" + content = ( + '[project]\nname = "example"\nrequires-python = ">=3.12"\ndependencies = ["httpx"]\n' + ) + assert _analyze_deps(content, "pyproject.toml") == [] + + def test_pyproject_vulnerable_dependency_still_detected(self) -> None: + """Real vulnerable deps in pyproject are still flagged (SC4 via static fallback).""" + content = '[project]\nrequires-python = ">=3.12"\ndependencies = ["pycrypto==2.6.1"]\n' + sc4 = [f for f in _analyze_deps(content, "pyproject.toml") if f.rule_id == "SC4"] + assert len(sc4) >= 1 + assert "pycrypto" in sc4[0].message.lower() or "CVE" in sc4[0].message + + def test_pyproject_no_project_table(self) -> None: + """A tool-only pyproject (no [project] table) yields no packages.""" + assert sc_mod._extract_packages_from_pyproject("[tool.black]\nline-length = 88\n") == [] + + def test_pyproject_skips_non_pep508_and_include_group_entries(self) -> None: + """Non-string group entries and non-PEP 508 strings are ignored.""" + content = ( + "[project]\n" + 'name = "x"\n' # no dependencies key + "[project.optional-dependencies]\n" + 'test = ["pytest"]\n' + "[dependency-groups]\n" + 'dev = ["ruff", {include-group = "test"}, "_bad"]\n' + ) + names = sorted(p[0] for p in sc_mod._extract_packages_from_pyproject(content)) + assert names == ["pytest", "ruff"] + # ── Supply Chain Safe Patterns (SC2) ───────────────────────────────────