Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ skillspector scan https://github.com/user/my-skill

# Scan a zip file
skillspector scan ./my-skill.zip

# Exclude files by glob (repeatable). Useful for binary assets that
# can trip up the regex scanner with false positives.
skillspector scan ./my-skill/ --exclude '*.pdf' --exclude 'assets/*'
```

### Output Formats
Expand Down Expand Up @@ -360,6 +364,7 @@ Options:
-f, --format [terminal|json|markdown|sarif] Output format [default: terminal]
-o, --output PATH Output file path
--no-llm Skip LLM analysis (static only)
--exclude TEXT Glob (relative to scan root) to exclude. Repeatable.
-V, --verbose Show detailed progress
--help Show this message and exit
```
Expand Down
18 changes: 17 additions & 1 deletion src/skillspector/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def _scan_state(
format: FormatChoice,
no_llm: bool,
yara_rules_dir: str | None = None,
exclude_patterns: list[str] | None = None,
) -> dict[str, object]:
"""Build initial graph state from scan CLI args."""
state: dict[str, object] = {
Expand All @@ -100,6 +101,8 @@ def _scan_state(
}
if yara_rules_dir is not None:
state["yara_rules_dir"] = yara_rules_dir
if exclude_patterns:
state["exclude_patterns"] = list(exclude_patterns)
return state


Expand Down Expand Up @@ -171,6 +174,13 @@ def scan(
help="Directory containing additional YARA rule files (.yar/.yara) to load alongside built-in rules.",
),
] = None,
exclude: Annotated[
list[str] | None,
typer.Option(
"--exclude",
help="Glob pattern (relative to scan root) to exclude from the scan. Repeatable.",
),
] = None,
verbose: Annotated[
bool,
typer.Option(
Expand Down Expand Up @@ -208,7 +218,13 @@ def scan(
result = None
try:
yara_dir = str(yara_rules_dir.resolve()) if yara_rules_dir else None
state = _scan_state(input_path, format, no_llm, yara_rules_dir=yara_dir)
state = _scan_state(
input_path,
format,
no_llm,
yara_rules_dir=yara_dir,
exclude_patterns=exclude,
)
if verbose:
set_level("DEBUG")
console.print("[dim]Running scan...[/dim]")
Expand Down
17 changes: 14 additions & 3 deletions src/skillspector/nodes/build_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from __future__ import annotations

import fnmatch
import re
from pathlib import Path

Expand Down Expand Up @@ -72,11 +73,14 @@ def _resolve_skill_dir(state: SkillspectorState) -> Path | None:
return resolved


def _walk_skill_files(skill_dir: Path) -> list[str]:
def _walk_skill_files(skill_dir: Path, exclude_patterns: list[str] | None = None) -> list[str]:
"""Walk skill directory and return sorted relative path strings.

Skips _SKIP_DIRS and hidden files except those starting with .claude.
Also skips paths matching any glob in ``exclude_patterns`` (fnmatch
semantics, matched against the path relative to ``skill_dir``).
"""
patterns = exclude_patterns or []
paths: list[str] = []
for item in skill_dir.rglob("*"):
if not item.is_file():
Expand All @@ -87,10 +91,15 @@ def _walk_skill_files(skill_dir: Path) -> list[str]:
continue
try:
rel = item.relative_to(skill_dir)
paths.append(str(rel))
except ValueError:
logger.debug("Skipping path (not under skill_dir): %s", item)
continue
rel_str = rel.as_posix()
matched = next((p for p in patterns if fnmatch.fnmatch(rel_str, p)), None)
if matched is not None:
logger.debug("Excluded by --exclude %r: %s", matched, rel_str)
continue
paths.append(str(rel))
paths.sort()
return paths

Expand Down Expand Up @@ -231,7 +240,9 @@ def build_context(state: SkillspectorState) -> dict[str, object]:
logger.debug("skill_path missing or not a directory; returning minimal context")
return _minimal_update()

components = _walk_skill_files(skill_dir)
raw_patterns = state.get("exclude_patterns") or []
exclude_patterns = [p for p in raw_patterns if isinstance(p, str) and p]
components = _walk_skill_files(skill_dir, exclude_patterns)
file_cache = _read_file_cache(skill_dir, components)
manifest = _parse_manifest(skill_dir)
component_metadata, has_executable_scripts = _build_component_metadata(skill_dir, components)
Expand Down
3 changes: 3 additions & 0 deletions src/skillspector/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ class SkillspectorState(TypedDict, total=False):
# Additional YARA rules directory (user-specified via --yara-rules-dir)
yara_rules_dir: str | None

# Glob patterns (relative to scan root) to exclude from scanning (--exclude)
exclude_patterns: list[str]


# Node IDs that use an LLM. Each such node should check use_llm at the top and return
# immediately (e.g. fallback / no-op) when False; no graph-level routing.
Expand Down
58 changes: 58 additions & 0 deletions tests/nodes/test_build_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,3 +185,61 @@ def test_build_context_skill_md_lowercase(tmp_path: Path) -> None:
assert result["manifest"]["description"] == "d"
assert "skill.md" in result["components"]
assert "references/guide.md" in result["components"]


def test_build_context_exclude_glob_filters_components(tmp_path: Path) -> None:
"""exclude_patterns drops matching files from components, file_cache, and metadata."""
_make_skill_spec_dir(tmp_path)
(tmp_path / "assets" / "template-style.pdf").write_bytes(b"%PDF-1.4\nshell=True\n%%EOF")
(tmp_path / "notes.pdf").write_bytes(b"%PDF-1.4 trailing")

state: SkillspectorState = {
"skill_path": str(tmp_path),
"exclude_patterns": ["*.pdf"],
}
result = build_context(state)

components = result["components"]
assert "assets/template-style.pdf" not in components
assert "notes.pdf" not in components
assert "SKILL.md" in components
assert "scripts/run.py" in components
assert "assets/template-style.pdf" not in result["file_cache"]
assert all(m.get("path") != "assets/template-style.pdf" for m in result["component_metadata"])


def test_build_context_exclude_directory_pattern(tmp_path: Path) -> None:
"""Glob like 'assets/*' excludes all files under that directory."""
_make_skill_spec_dir(tmp_path)
state: SkillspectorState = {
"skill_path": str(tmp_path),
"exclude_patterns": ["assets/*"],
}
result = build_context(state)
assert not any(p.startswith("assets/") for p in result["components"])
assert "scripts/run.py" in result["components"]


def test_build_context_exclude_no_match_keeps_all(tmp_path: Path) -> None:
"""Patterns that don't match anything leave the components list untouched."""
_make_skill_spec_dir(tmp_path)
state: SkillspectorState = {
"skill_path": str(tmp_path),
"exclude_patterns": ["*.xyz"],
}
result = build_context(state)
assert "SKILL.md" in result["components"]
assert "assets/icon.png" in result["components"]


def test_build_context_exclude_everything_yields_empty(tmp_path: Path) -> None:
"""Excluding every file is valid — empty components, empty file_cache."""
_make_skill_spec_dir(tmp_path)
state: SkillspectorState = {
"skill_path": str(tmp_path),
"exclude_patterns": ["*"],
}
result = build_context(state)
assert result["components"] == []
assert result["file_cache"] == {}
assert result["component_metadata"] == []
88 changes: 88 additions & 0 deletions tests/unit/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

"""Tests for skillspector CLI (skillspector scan, --version)."""

import json
from pathlib import Path

from typer.testing import CliRunner
Expand All @@ -24,6 +25,15 @@
runner = CliRunner()


# Minimal PDF-like bytes containing a TM1 trigger (shell=True). The static
# pattern scanner reads files with utf-8 + errors='replace', so binary assets
# can match regex patterns and produce spurious HIGH findings — which is
# exactly the false positive --exclude is meant to suppress.
_PDF_WITH_TM1 = (
b"%PDF-1.4\n1 0 obj<</Type/Catalog>>endobj\n% subprocess.run(cmd, shell=True)\n%%EOF\n"
)


def test_cli_version() -> None:
"""--version prints version and exits 0."""
result = runner.invoke(app, ["--version"])
Expand Down Expand Up @@ -67,3 +77,81 @@ def test_cli_scan_nonexistent_exits_2() -> None:
result = runner.invoke(app, ["scan", "/nonexistent/path/xyz"])
assert result.exit_code == 2
assert "Error" in result.output or "error" in result.output.lower()


def _make_pdf_fixture_skill(root: Path) -> Path:
"""Create a skill dir whose only non-SKILL.md file is a PDF carrying TM1 bytes."""
skill_dir = root / "skill"
(skill_dir / "assets").mkdir(parents=True)
(skill_dir / "SKILL.md").write_text("---\nname: exclude-test\n---\n# Skill\n", encoding="utf-8")
(skill_dir / "assets" / "template-style.pdf").write_bytes(_PDF_WITH_TM1)
return skill_dir


def test_cli_scan_exclude_drops_pdf_from_components_and_findings(tmp_path: Path) -> None:
"""--exclude '*.pdf' skips the PDF: no findings raised against it, not in components."""
skill_dir = _make_pdf_fixture_skill(tmp_path)
result = runner.invoke(
app,
[
"scan",
str(skill_dir),
"--format",
"json",
"--no-llm",
"--exclude",
"*.pdf",
],
)
assert result.exit_code == 0, result.output
report = json.loads(result.output)
component_paths = [c.get("path") for c in report.get("components", [])]
assert "assets/template-style.pdf" not in component_paths
issues = report.get("issues", [])
assert all(i.get("location", {}).get("file") != "assets/template-style.pdf" for i in issues)


def test_cli_scan_exclude_repeatable(tmp_path: Path) -> None:
"""Multiple --exclude flags compose; each pattern filters independently."""
skill_dir = _make_pdf_fixture_skill(tmp_path)
(skill_dir / "notes.txt").write_text("plain text", encoding="utf-8")
result = runner.invoke(
app,
[
"scan",
str(skill_dir),
"--format",
"json",
"--no-llm",
"--exclude",
"*.pdf",
"--exclude",
"*.txt",
],
)
assert result.exit_code == 0, result.output
report = json.loads(result.output)
component_paths = [c.get("path") for c in report.get("components", [])]
assert "assets/template-style.pdf" not in component_paths
assert "notes.txt" not in component_paths


def test_cli_scan_exclude_everything_succeeds(tmp_path: Path) -> None:
"""Excluding every file is valid: scan succeeds with no findings."""
skill_dir = _make_pdf_fixture_skill(tmp_path)
result = runner.invoke(
app,
[
"scan",
str(skill_dir),
"--format",
"json",
"--no-llm",
"--exclude",
"*",
],
)
assert result.exit_code == 0, result.output
report = json.loads(result.output)
assert report.get("components", []) == []
assert report.get("issues", []) == []