NVIDIA · korjavin · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026
diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@ SkillSpector helps you answer: **"Is this skill safe to install?"**
 ## Features
 
 - **Multi-format input**: Scan Git repos, URLs, zip files, directories, or single files
-- **64 vulnerability patterns** across 16 categories: prompt injection, data exfiltration, privilege escalation, supply chain, excessive agency, output handling, system prompt leakage, memory poisoning, tool misuse, rogue agent, trigger abuse, dangerous code (AST), taint tracking, YARA signatures, MCP least privilege, and MCP tool poisoning
+- **65 vulnerability patterns** across 16 categories: prompt injection, data exfiltration, privilege escalation, supply chain, excessive agency, output handling, system prompt leakage, memory poisoning, tool misuse, rogue agent, trigger abuse, dangerous code (AST), taint tracking, YARA signatures, MCP least privilege, and MCP tool poisoning
 - **Two-stage analysis**: Fast static analysis + optional LLM semantic evaluation
 - **Live vulnerability lookups**: SC4 queries [OSV.dev](https://osv.dev) for real-time CVE data with automatic offline fallback
 - **Multiple output formats**: Terminal, JSON, Markdown, and SARIF reports
@@ -125,9 +125,9 @@ skillspector scan ./my-skill/ --no-llm
 
 ## Vulnerability Patterns
 
-SkillSpector detects **64 vulnerability patterns** across 16 categories:
+SkillSpector detects **65 vulnerability patterns** across 16 categories:
 
-### Prompt Injection (5 patterns)
+### Prompt Injection (6 patterns)
 
 | ID | Pattern | Severity | Description |
 |----|---------|----------|-------------|
@@ -136,6 +136,7 @@ SkillSpector detects **64 vulnerability patterns** across 16 categories:
 | P3 | Exfiltration Commands | HIGH | Instructions to transmit context externally |
 | P4 | Behavior Manipulation | MEDIUM | Subtle instructions altering agent decisions |
 | P5 | Harmful Content | CRITICAL | Instructions that could cause physical harm |
+| P9 | Whitespace Padding | MEDIUM | Large whitespace padding hiding instructions below/beside the visible area |
 
 ### Data Exfiltration (4 patterns)
 

diff --git a/src/skillspector/nodes/analyzers/mcp_tool_poisoning.py b/src/skillspector/nodes/analyzers/mcp_tool_poisoning.py
@@ -25,6 +25,10 @@
 
 from skillspector.llm_utils import chat_completion
 from skillspector.models import Finding
+from skillspector.nodes.analyzers.whitespace_padding import (
+    ZERO_WIDTH_CHARS,
+    detect_whitespace_padding,
+)
 from skillspector.state import AnalyzerNodeResponse, SkillspectorState
 
 ANALYZER_ID = "mcp_tool_poisoning"
@@ -130,8 +134,14 @@ def _extract_metadata_texts(manifest: dict) -> list[tuple[str, str, bool]]:
 # Markdown comment: [//]: # (...)
 _MARKDOWN_COMMENT_RE = re.compile(r"\[//\]:\s*#\s*\(.*?\)")
 
-# Zero-width chars followed by visible text
-_ZERO_WIDTH_RE = re.compile(r"[\u200b\u200c\u200d]+\S")
+# Zero-width chars followed by visible text.
+#
+# The character class is derived from the shared ``ZERO_WIDTH_CHARS`` constant in
+# ``whitespace_padding`` so TP1's hidden-text check and P2/P9 cannot drift apart
+# (single shared definition). Converging on the shared set also adds U+2060 (WORD
+# JOINER) and U+FEFF (ZERO WIDTH NO-BREAK SPACE / BOM) coverage to this check \u2014 a
+# strict improvement over the previous U+200B/U+200C/U+200D-only class.
+_ZERO_WIDTH_RE = re.compile("[" + "".join(sorted(ZERO_WIDTH_CHARS)) + "]+\\S")
 
 # Base64 blobs (>=50 chars) — checked AFTER data URI to avoid double-counting
 _BASE64_RE = re.compile(r"[A-Za-z0-9+/]{50,}={0,2}")
@@ -296,6 +306,63 @@ def _check_tp1(text: str, source_field: str) -> list[Finding]:
     return findings
 
 
+# ---------------------------------------------------------------------------
+# P9: Whitespace padding (shared detector)
+# ---------------------------------------------------------------------------
+
+
+def _check_p9_padding(text: str, source_field: str) -> list[Finding]:
+    """Detect whitespace-padding runs hidden in a metadata text field.
+
+    Uses the shared ``detect_whitespace_padding`` scanner. Severity is per kind:
+    "horizontal" and "vertical" runs surface as MEDIUM / 0.7 confidence, while
+    "block" runs (a contiguous multibyte span over the byte budget that stays
+    under the line/char primaries) surface as LOW / 0.4. The "ratio" signal is
+    skipped (manifest fields are too short for the 4 KB floor to apply).
+    "vertical" runs matter here because padding built from Unicode line
+    separators (U+2028 / U+2029 / U+0085) splits into many blank logical lines
+    and is classified vertical, yet inside a single description field it is still
+    a hidden run that must surface a P9. Emits one P9 finding per surviving run.
+    """
+    findings: list[Finding] = []
+
+    for run in detect_whitespace_padding(text):
+        if run.kind not in ("horizontal", "vertical", "block"):
+            continue
+        if run.kind in ("horizontal", "vertical"):
+            severity = "MEDIUM"
+            confidence = 0.7
+        else:  # "block"
+            severity = "LOW"
+            confidence = 0.4
+        findings.append(
+            Finding(
+                rule_id="P9",
+                message=(
+                    f"Whitespace padding found in '{source_field}': "
+                    "large whitespace run may hide instructions from reviewers."
+                ),
+                severity=severity,
+                confidence=confidence,
+                file="SKILL.md",
+                category=_CATEGORY,
+                tags=list(_FRAMEWORK_TAGS),
+                matched_text=run.summary,
+                explanation=(
+                    "Large runs of whitespace padding in metadata fields can push injected "
+                    "instructions out of a human reviewer's view while the AI agent still "
+                    "processes the full text."
+                ),
+                remediation=(
+                    "Remove oversized whitespace runs from metadata fields. "
+                    "Descriptions should contain normal, visible text only."
+                ),
+            )
+        )
+
+    return findings
+
+
 # ---------------------------------------------------------------------------
 # TP2: Unicode deception
 # ---------------------------------------------------------------------------
@@ -825,6 +892,11 @@ def node(state: SkillspectorState) -> AnalyzerNodeResponse:
     for text, source_field, is_identifier in metadata_texts:
         findings.extend(_check_tp2(text, source_field, is_identifier))
 
+    # P9: Whitespace padding — check non-identifier (free-text) fields only
+    for text, source_field, is_identifier in metadata_texts:
+        if not is_identifier:
+            findings.extend(_check_p9_padding(text, source_field))
+
     # TP3: Parameter description injection — check parameters
     params = manifest.get("parameters") or []
     if isinstance(params, list):

diff --git a/src/skillspector/nodes/analyzers/pattern_defaults.py b/src/skillspector/nodes/analyzers/pattern_defaults.py
@@ -47,6 +47,7 @@ class PatternCategory(StrEnum):
     "P3": "Instructions found that direct the agent to transmit conversation context or user data to external services.",
     "P4": "Subtle instructions detected that may alter agent decision-making or introduce hidden biases.",
     "P5": "This content may contain harmful instructions that could cause physical harm if followed. CRITICAL: Review carefully before use.",
+    "P9": "Large whitespace padding was detected (a block of blank lines or a long run of spaces). This can push injected instructions below or to the right of the visible area so a human reviewer never sees them while the agent still reads them. Manual review of the hidden content is recommended.",
     "E1": "Data is being sent to an external URL. This could be legitimate telemetry or data exfiltration. Manual review is recommended.",
     "E2": "Code accesses environment variables that may contain secrets (API keys, tokens). This is a common pattern for credential theft.",
     "E3": "Code scans file system directories looking for sensitive files. This could be reconnaissance for credential theft.",
@@ -128,6 +129,7 @@ class PatternCategory(StrEnum):
     "P3": PatternCategory.PROMPT_INJECTION.value,
     "P4": PatternCategory.PROMPT_INJECTION.value,
     "P5": PatternCategory.PROMPT_INJECTION.value,
+    "P9": PatternCategory.PROMPT_INJECTION.value,
     "P6": PatternCategory.SYSTEM_PROMPT_LEAKAGE.value,
     "P7": PatternCategory.SYSTEM_PROMPT_LEAKAGE.value,
     "P8": PatternCategory.SYSTEM_PROMPT_LEAKAGE.value,
@@ -191,6 +193,7 @@ class PatternCategory(StrEnum):
     "P3": "External Transmission Instructions",
     "P4": "Subtle Steering",
     "P5": "Harmful Content",
+    "P9": "Whitespace Padding",
     "P6": "System Prompt Leakage",
     "P7": "System Prompt Leakage",
     "P8": "System Prompt Leakage",
@@ -254,6 +257,7 @@ class PatternCategory(StrEnum):
     "P3": "Remove instructions that send user data, prompts, or context to external URLs. If telemetry is needed, use documented, privacy-preserving methods.",
     "P4": "Review content for implicit steering or bias. Ensure instructions are explicit and align with the skill's stated purpose.",
     "P5": "Remove all content that could lead to harmful outcomes. Add safety guardrails and human oversight for any high-risk operations.",
+    "P9": "Remove the large whitespace padding (blank-line blocks or long space runs) and review any content hidden below or to the right of it. Keep skill files compact and reviewable so no instructions can be concealed off-screen.",
     "E1": "Verify the destination URL is trusted and necessary. Remove or replace with documented APIs. Ensure no secrets, tokens, or PII are transmitted.",
     "E2": "Avoid reading sensitive env vars (API keys, tokens) unless strictly required. Use secrets managers or secure config. Never log or transmit credentials.",
     "E3": "Remove unnecessary filesystem scanning. If file access is needed, use explicit, scoped paths. Avoid reading ~/.ssh, ~/.aws, or credential directories.",

diff --git a/src/skillspector/nodes/analyzers/static_patterns_prompt_injection.py b/src/skillspector/nodes/analyzers/static_patterns_prompt_injection.py
@@ -13,10 +13,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Static patterns: prompt injection (P1–P4). Node and analyze() in one module."""
+"""Static patterns: prompt injection (P1–P4, P9). Node and analyze() in one module."""
 
 from __future__ import annotations
 
+import fnmatch
 import re
 import sys
 
@@ -27,11 +28,34 @@
 from . import static_runner
 from .common import get_context, get_line_number
 from .pattern_defaults import PatternCategory
+from .whitespace_padding import (
+    VERTICAL_HIGH_SEVERITY_LINES,
+    ZERO_WIDTH_CHARS,
+    detect_whitespace_padding,
+)
 
 logger = get_logger(__name__)
 
 ANALYZER_ID = "static_patterns_prompt_injection"
 
+# Generated/vendored filename globs for which the P9 whitespace-padding signal is
+# skipped (these legitimately carry large whitespace runs). Applies ONLY to P9.
+_P9_SKIP_GLOBS = (
+    "*.min.js",
+    "*.min.css",
+    "*.lock",
+    "package-lock.json",
+    "yarn.lock",
+    "*.svg",
+    "*.map",
+)
+
+
+def _is_p9_skipped_path(file_path: str) -> bool:
+    """Return True when *file_path* is a generated/vendored file P9 should skip."""
+    name = file_path.replace("\\", "/").rsplit("/", 1)[-1]
+    return any(fnmatch.fnmatch(name, pattern) for pattern in _P9_SKIP_GLOBS)
+
 # P1: Instruction Override
 P1_PATTERNS = [
     (r"ignore\s+(?:all\s+)?previous\s+instructions?", 0.8),
@@ -50,7 +74,9 @@
 P2_PATTERNS = [
     (r"<!--.*?(?:system|instructions?|ignore|POST|GET|send|transmit).*?-->", 0.7),
     (r"\[//\]:\s*#\s*\(.*?(?:system|instructions?|ignore|POST|GET|send|transmit).*?\)", 0.8),
-    (r"[\u200b\u200c\u200d\u2060\ufeff]", 0.6),
+    # Zero-width character class built from the shared ZERO_WIDTH_CHARS constant so
+    # P2 and the P9 detector cannot drift (same five code points as before).
+    ("[" + "".join(sorted(ZERO_WIDTH_CHARS)) + "]", 0.6),
     (r"data:text/plain;base64,[A-Za-z0-9+/=]{50,}", 0.7),
 ]
 # P3: Exfiltration Commands
@@ -117,7 +143,7 @@
 
 
 def analyze(content: str, file_path: str, file_type: str) -> list[AnalyzerFinding]:
-    """Analyze content for prompt injection patterns (P1–P4)."""
+    """Analyze content for prompt injection patterns (P1–P4, P9)."""
     findings: list[AnalyzerFinding] = []
 
     def loc(ln: int) -> Location:
@@ -189,6 +215,35 @@ def ctx(start: int) -> str:
                     matched_text=match.group(0)[:200],
                 )
             )
+    # P9: Whitespace Padding (skipped for generated/vendored files).
+    if not _is_p9_skipped_path(file_path):
+        for run in detect_whitespace_padding(content, file_type=file_type):
+            if run.kind == "vertical":
+                confidence = 0.8 if run.followed_by_content else 0.6
+                severity = (
+                    Severity.HIGH
+                    if run.followed_by_content
+                    and run.length >= VERTICAL_HIGH_SEVERITY_LINES
+                    else Severity.MEDIUM
+                )
+            elif run.kind == "horizontal":
+                confidence = 0.7
+                severity = Severity.MEDIUM
+            else:  # "block" or "ratio"
+                confidence = 0.4
+                severity = Severity.LOW
+            findings.append(
+                AnalyzerFinding(
+                    rule_id="P9",
+                    message="Whitespace Padding",
+                    severity=severity,
+                    location=loc(run.start_line),
+                    confidence=confidence,
+                    tags=tag,
+                    context=ctx(run.start_offset),
+                    matched_text=run.summary,
+                )
+            )
     return findings