NVIDIA · wernerkasselman-au · Jun 10, 2026 · Jun 10, 2026
diff --git a/README.md b/README.md
@@ -14,7 +14,6 @@ SkillSpector helps you answer: **"Is this skill safe to install?"**
 ## Documentation
 
 - **[Development guide](docs/DEVELOPMENT.md)** — Architecture, package layout, and how to extend the analyzer pipeline.
-- **[OSS_RELEASE.md](OSS_RELEASE.md)** — How to produce a public-OSS branch from this repo.
 
 ## Features
 
@@ -272,10 +271,7 @@ SkillSpector detects **64 vulnerability patterns** across 16 categories:
 | TP3 | Parameter Description Injection | MEDIUM | Injection patterns in parameter definitions (overrides, system tokens, malicious defaults) |
 | TP4 | Description-Behavior Mismatch | MEDIUM | Declared tool description does not match actual code behavior (LLM-powered) |
 
-View all patterns:
-```bash
-skillspector patterns
-```
+All detected patterns are listed in the tables above.
 
 ## Risk Scoring
 
@@ -301,7 +297,7 @@ skillspector patterns
 ### Terminal Output
 
 ```
- SkillSpector Security Report  v0.1.0
+ SkillSpector Security Report  v2.0.0
 
 Skill: suspicious-skill
 Source: ./suspicious-skill/

diff --git a/src/skillspector/llm_analyzer_base.py b/src/skillspector/llm_analyzer_base.py
@@ -28,11 +28,13 @@
 from __future__ import annotations
 
 import asyncio
+import os
+import time
 from collections import defaultdict
 from dataclasses import dataclass, field
 from typing import Literal
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, field_validator
 
 from skillspector.llm_utils import get_chat_model
 from skillspector.logging_config import get_logger
@@ -45,6 +47,83 @@
 CHARS_PER_TOKEN = 4
 CHUNK_OVERLAP_LINES = 50
 
+# Concurrency / rate-limit handling for hosted providers (e.g. NVIDIA build
+# tier) that 429 under bursty parallelism. Override via env without code edits.
+DEFAULT_MAX_CONCURRENCY = int(os.environ.get("SKILLSPECTOR_MAX_CONCURRENCY", "2"))
+_RATE_LIMIT_MAX_RETRIES = int(os.environ.get("SKILLSPECTOR_RATELIMIT_RETRIES", "5"))
+_RATE_LIMIT_BASE_DELAY = float(os.environ.get("SKILLSPECTOR_RATELIMIT_BASE_DELAY", "2.0"))
+
+
+def _is_rate_limit_error(exc: BaseException) -> bool:
+    """Heuristically detect a 429 / rate-limit error across SDKs."""
+    if getattr(exc, "status_code", None) == 429 or getattr(exc, "status", None) == 429:
+        return True
+    name = type(exc).__name__
+    text = str(exc)
+    return "RateLimit" in name or "429" in text or "Too Many Requests" in text
+
+
+def _is_timeout_error(exc: BaseException) -> bool:
+    """Detect a transient request-timeout error across SDKs."""
+    name = type(exc).__name__
+    text = str(exc).lower()
+    return "Timeout" in name or "timed out" in text or "timeout" in text
+
+
+def _is_retryable_error(exc: BaseException) -> bool:
+    """Transient errors worth retrying: rate limits and timeouts.
+
+    Deliberately excludes 4xx like context-overflow (400) — retrying those is
+    futile; the caller skips that batch instead.
+    """
+    return _is_rate_limit_error(exc) or _is_timeout_error(exc)
+
+
+async def _ainvoke_with_retry(coro_factory, label: str):
+    """Await ``coro_factory()``; on rate-limit errors retry with exponential backoff.
+
+    ``coro_factory`` must return a *fresh* awaitable each call (coroutines are
+    single-use). Non-rate-limit errors propagate immediately.
+    """
+    for attempt in range(_RATE_LIMIT_MAX_RETRIES + 1):
+        try:
+            return await coro_factory()
+        except Exception as exc:  # noqa: BLE001 - re-raised below unless rate limit
+            if not _is_retryable_error(exc) or attempt == _RATE_LIMIT_MAX_RETRIES:
+                raise
+            delay = _RATE_LIMIT_BASE_DELAY * (2**attempt)
+            logger.warning(
+                "Rate limited on %s (attempt %d/%d); backing off %.1fs",
+                label,
+                attempt + 1,
+                _RATE_LIMIT_MAX_RETRIES,
+                delay,
+            )
+            await asyncio.sleep(delay)
+
+
+def _invoke_with_retry(call_factory, label: str):
+    """Synchronous twin of :func:`_ainvoke_with_retry`.
+
+    ``call_factory`` must perform a *fresh* invocation each call and return its
+    result. Non-rate-limit errors propagate immediately.
+    """
+    for attempt in range(_RATE_LIMIT_MAX_RETRIES + 1):
+        try:
+            return call_factory()
+        except Exception as exc:  # noqa: BLE001 - re-raised below unless rate limit
+            if not _is_retryable_error(exc) or attempt == _RATE_LIMIT_MAX_RETRIES:
+                raise
+            delay = _RATE_LIMIT_BASE_DELAY * (2**attempt)
+            logger.warning(
+                "Rate limited on %s (attempt %d/%d); backing off %.1fs",
+                label,
+                attempt + 1,
+                _RATE_LIMIT_MAX_RETRIES,
+                delay,
+            )
+            time.sleep(delay)
+
 
 # ---------------------------------------------------------------------------
 # Default structured-output schemas (discovery mode)
@@ -61,12 +140,25 @@ class LLMFinding(BaseModel):
     rule_id: str = Field(description="Identifier for the type of finding")
     message: str = Field(description="Short description of the finding")
     severity: Literal["LOW", "MEDIUM", "HIGH", "CRITICAL"] = Field(description="Severity level")
-    start_line: int = Field(ge=1, description="Starting line number")
+    # NOTE: numeric range constraints (ge/le) are intentionally expressed in the
+    # description rather than as Field bounds. Pydantic bounds emit JSON-schema
+    # minimum/maximum, which some structured-output endpoints (e.g. Anthropic's
+    # OpenAI-compatible tool schema) reject. Consuming code clamps/guards values.
+    start_line: int = Field(description="Starting line number (>= 1)")
     end_line: int | None = Field(default=None, description="Ending line number (optional)")
-    confidence: float = Field(ge=0.0, le=1.0, default=0.5, description="Confidence score")
+    confidence: float = Field(default=0.5, description="Confidence score between 0.0 and 1.0")
     explanation: str = Field(default="", description="Why this is a finding (2-3 sentences)")
     remediation: str = Field(default="", description="Actionable steps to fix the issue")
 
+    @field_validator("confidence")
+    @classmethod
+    def _check_confidence(cls, v: float) -> float:
+        # Runtime bound only — kept out of the JSON schema (see note above) so
+        # structured-output endpoints that reject minimum/maximum still work.
+        if not 0.0 <= v <= 1.0:
+            raise ValueError("confidence must be between 0.0 and 1.0")
+        return v
+
     def to_finding(self, file: str) -> Finding:
         """Convert to a :class:`Finding` for the graph state."""
         return Finding(
@@ -352,20 +444,30 @@ def run_batches(
                 estimate_tokens(prompt),
                 len(batch.findings),
             )
-            if self._structured_llm:
-                response = self._structured_llm.invoke(prompt)
-            else:
-                response = self._llm.invoke(prompt).content
-            logger.debug("LLM response for %s", batch.file_label)
-            parsed = self.parse_response(response, batch)
+            try:
+                if self._structured_llm:
+                    response = _invoke_with_retry(
+                        lambda p=prompt: self._structured_llm.invoke(p), batch.file_label
+                    )
+                else:
+                    response = _invoke_with_retry(
+                        lambda p=prompt: self._llm.invoke(p), batch.file_label
+                    ).content
+                logger.debug("LLM response for %s", batch.file_label)
+                parsed = self.parse_response(response, batch)
+            except NotImplementedError:
+                raise  # structural/subclass bug — never swallow
+            except Exception as exc:  # noqa: BLE001 - isolate one batch's failure
+                logger.warning("Skipping batch %s after error: %s", batch.file_label, exc)
+                continue
             results.append((batch, parsed))
         return results
 
     async def arun_batches(
         self,
         batches: list[Batch],
         *,
-        max_concurrency: int = 10,
+        max_concurrency: int = DEFAULT_MAX_CONCURRENCY,
         **kwargs: object,
     ) -> list[tuple[Batch, list]]:
         """Execute LLM calls for all *batches* concurrently.
@@ -378,7 +480,7 @@ async def arun_batches(
         """
         sem = asyncio.Semaphore(max_concurrency)
 
-        async def _process(batch: Batch) -> tuple[Batch, list]:
+        async def _process(batch: Batch) -> tuple[Batch, list] | None:
             async with sem:
                 prompt = self.build_prompt(batch, **kwargs)
                 logger.debug(
@@ -387,14 +489,26 @@ async def _process(batch: Batch) -> tuple[Batch, list]:
                     estimate_tokens(prompt),
                     len(batch.findings),
                 )
-                if self._structured_llm:
-                    response = await self._structured_llm.ainvoke(prompt)
-                else:
-                    response = (await self._llm.ainvoke(prompt)).content
-                logger.debug("LLM response for %s", batch.file_label)
-                return (batch, self.parse_response(response, batch))
-
-        return list(await asyncio.gather(*[_process(b) for b in batches]))
+                try:
+                    if self._structured_llm:
+                        response = await _ainvoke_with_retry(
+                            lambda: self._structured_llm.ainvoke(prompt), batch.file_label
+                        )
+                    else:
+                        invoked = await _ainvoke_with_retry(
+                            lambda: self._llm.ainvoke(prompt), batch.file_label
+                        )
+                        response = invoked.content
+                    logger.debug("LLM response for %s", batch.file_label)
+                    return (batch, self.parse_response(response, batch))
+                except NotImplementedError:
+                    raise  # structural/subclass bug — never swallow
+                except Exception as exc:  # noqa: BLE001 - isolate one batch's failure
+                    logger.warning("Skipping batch %s after error: %s", batch.file_label, exc)
+                    return None
+
+        gathered = await asyncio.gather(*[_process(b) for b in batches])
+        return [r for r in gathered if r is not None]
 
     # -- Convenience --------------------------------------------------------
 

diff --git a/src/skillspector/nodes/analyzers/behavioral_ast.py b/src/skillspector/nodes/analyzers/behavioral_ast.py
@@ -24,7 +24,7 @@
 from skillspector.state import AnalyzerNodeResponse, SkillspectorState
 
 from .common import get_context_from_lines, get_source_segment, resolve_call_name
-from .static_runner import MAX_FILE_BYTES, analyzer_finding_to_finding
+from .static_runner import analyzer_finding_to_finding, raise_if_content_exceeds_limit
 
 ANALYZER_ID = "behavioral_ast"
 logger = get_logger(__name__)
@@ -216,8 +216,9 @@ def node(state: SkillspectorState) -> AnalyzerNodeResponse:
         if not path.endswith(".py"):
             continue
         content = file_cache.get(path)
-        if content is None or len(content) > MAX_FILE_BYTES:
+        if content is None:
             continue
+        raise_if_content_exceeds_limit(path, content)
         raw = _analyze_python(content, path)
         all_findings.extend(analyzer_finding_to_finding(af) for af in raw)
 

diff --git a/src/skillspector/nodes/analyzers/behavioral_taint_tracking.py b/src/skillspector/nodes/analyzers/behavioral_taint_tracking.py
@@ -36,7 +36,7 @@
     resolve_call_name_typed,
     resolve_dotted_name,
 )
-from .static_runner import MAX_FILE_BYTES, analyzer_finding_to_finding
+from .static_runner import analyzer_finding_to_finding, raise_if_content_exceeds_limit
 
 ANALYZER_ID = "behavioral_taint_tracking"
 logger = get_logger(__name__)
@@ -400,8 +400,9 @@ def node(state: SkillspectorState) -> AnalyzerNodeResponse:
         if not path.endswith(".py"):
             continue
         content = file_cache.get(path)
-        if content is None or len(content) > MAX_FILE_BYTES:
+        if content is None:
             continue
+        raise_if_content_exceeds_limit(path, content)
         raw = _analyze_python(content, path)
         all_findings.extend(analyzer_finding_to_finding(af) for af in raw)
 

diff --git a/src/skillspector/nodes/analyzers/mcp_tool_poisoning.py b/src/skillspector/nodes/analyzers/mcp_tool_poisoning.py
@@ -835,8 +835,11 @@ def node(state: SkillspectorState) -> AnalyzerNodeResponse:
     if isinstance(params, list):
         findings.extend(_check_tp3(params))
 
-    # TP4: LLM-based check (only when use_llm is enabled)
-    if state.get("use_llm", False):
+    # TP4: LLM-based check (only when use_llm is enabled). Defaults to True to
+    # match every other LLM-using node (semantic_*, meta_analyzer); the CLI
+    # always sets this explicitly, so the default only affects programmatic
+    # callers that omit the key.
+    if state.get("use_llm", True):
         findings.extend(_check_tp4(state))
 
     logger.info("%s: %d findings", ANALYZER_ID, len(findings))

diff --git a/src/skillspector/nodes/analyzers/static_runner.py b/src/skillspector/nodes/analyzers/static_runner.py
@@ -46,7 +46,17 @@
     ".rs": "rust",
 }
 
-MAX_FILE_BYTES = 1_000_000
+MAX_FILE_BYTES = 50 * 1024 * 1024
+
+
+def raise_if_content_exceeds_limit(path: str, content: str) -> None:
+    """Fail closed if analyzer input exceeds the per-file analysis limit."""
+    size_bytes = len(content.encode("utf-8"))
+    if size_bytes > MAX_FILE_BYTES:
+        raise ValueError(
+            "Scan aborted: file size exceeds the per-file analysis limit "
+            f"({MAX_FILE_BYTES} bytes): {path} ({size_bytes} bytes)"
+        )
 
 
 def _infer_file_type(path: str) -> str:
@@ -107,14 +117,7 @@ def run_static_patterns(
         if content is None:
             logger.debug("Skipping %s: no content in file_cache", path)
             continue
-        if len(content) > MAX_FILE_BYTES:
-            logger.debug(
-                "Skipping %s: size %d exceeds MAX_FILE_BYTES (%d)",
-                path,
-                len(content),
-                MAX_FILE_BYTES,
-            )
-            continue
+        raise_if_content_exceeds_limit(path, content)
         file_type = _infer_file_type(path)
         for module in pattern_modules:
             raw = module.analyze(content=content, file_path=path, file_type=file_type)

diff --git a/src/skillspector/nodes/analyzers/static_yara.py b/src/skillspector/nodes/analyzers/static_yara.py
@@ -33,7 +33,7 @@
 
 from .common import get_context, get_line_number
 from .pattern_defaults import PatternCategory
-from .static_runner import MAX_FILE_BYTES, analyzer_finding_to_finding
+from .static_runner import analyzer_finding_to_finding, raise_if_content_exceeds_limit
 
 ANALYZER_ID = "static_yara"
 logger = get_logger(__name__)
@@ -243,9 +243,7 @@ def node(state: SkillspectorState) -> AnalyzerNodeResponse:
         content = file_cache.get(path)
         if content is None:
             continue
-        if len(content) > MAX_FILE_BYTES:
-            logger.debug("%s: skipping %s (exceeds size limit)", ANALYZER_ID, path)
-            continue
+        raise_if_content_exceeds_limit(path, content)
         for af in _match_file(rules, content, path):
             findings.append(analyzer_finding_to_finding(af))