From c174662308d12abe9544aee275a8b2ad437c2bf1 Mon Sep 17 00:00:00 2001 From: seonghobae <8172694+seonghobae@users.noreply.github.com> Date: Tue, 30 Jun 2026 14:29:48 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=9B=A1=EF=B8=8F=20Sentinel:=20[CRITICAL]?= =?UTF-8?q?=20Fix=20Server-Side=20Request=20Forgery=20(SSRF)=20detection?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .jules/sentinel.md | 5 ++ scanner/cli/appguardrail.py | 60 ++++++++++++------ tests/test_appguardrail.py | 119 ++++++++++++++++++++++++------------ 3 files changed, 127 insertions(+), 57 deletions(-) diff --git a/.jules/sentinel.md b/.jules/sentinel.md index 69cb762..662e4da 100644 --- a/.jules/sentinel.md +++ b/.jules/sentinel.md @@ -56,3 +56,8 @@ **Vulnerability:** The CLI file scanner `vibesec scan` lacked detection rules for insecure deserialization, such as `pickle.load` or `yaml.load` in Python, which could lead to arbitrary code execution. **Learning:** Adding regular expressions that detect known dangerous serialization libraries prevents severe security vulnerabilities when parsing untrusted data. **Prevention:** A new scanner rule `python-insecure-deserialization` was added to `SCAN_RULES` to flag `pickle.load(s)`, `yaml.load`, and `marshal.load(s)`. + +## 2026-06-25 - Expand Scanner Rules for SSRF +**Vulnerability:** The AppGuardrail static analysis scanner lacked explicit detection for Server-Side Request Forgery (SSRF) risks caused by dynamically constructing URLs using untrusted inputs (e.g., Python `requests.get(f"...{var}...")` or Node `fetch(`...${var}...`)`). +**Learning:** SSRF is a critical OWASP Top 10 vulnerability (A10:2021). Dynamic URL construction in network requests without validation is a common security failure, especially in code generated by AI that may not anticipate the network request being manipulated to reach internal systems. +**Prevention:** A new rule `ssrf-risk` was added to `SCAN_RULES` to flag unsafe usage of network fetching functions (e.g., `fetch`, `axios`, `requests.get`, `urllib.request.urlopen`) when used with string concatenation, f-strings, or template literals. diff --git a/scanner/cli/appguardrail.py b/scanner/cli/appguardrail.py index 7540117..917fbee 100644 --- a/scanner/cli/appguardrail.py +++ b/scanner/cli/appguardrail.py @@ -35,8 +35,8 @@ import json import os import re -import shutil import shlex +import shutil import stat import subprocess import sys @@ -371,6 +371,15 @@ "message": "Potential Command Injection detected: string concatenation or template literal in child_process execution. [OWASP A03:2021 - Injection]", "extensions": [".ts", ".tsx", ".js", ".jsx"], }, + { + "id": "ssrf-risk", + "pattern": re.compile( + r"(?is)\b(?:fetch|axios(?:\.\w+)?|urllib(?:3)?\.(?:request\.)?urlopen|requests\.(?:get|post|put|delete|patch|request)|http\.client\.HTTPSConnection)\s*\(\s*(?:`[^`]*\$\{[^}]+\}[^`]*`|f[\"'][^\"']*\{[^}]+\}[^\"']*[\"']|[\"'].*?[\"']\s*\+\s*[a-zA-Z0-9_]+)" + ), + "severity": "CRITICAL", + "message": "Potential Server-Side Request Forgery (SSRF) detected: dynamic URL construction in network request. Validate and sanitize URLs before fetching. [OWASP A10:2021 - Server-Side Request Forgery (SSRF)]", + "extensions": [".ts", ".tsx", ".js", ".jsx", ".py"], + }, { "id": "python-command-injection", "pattern": re.compile( @@ -670,7 +679,9 @@ def finish_rule(): path_mode = None continue if raw_line.startswith(" severity: "): - current["severity"] = _unquote_rule_scalar(raw_line.split(":", 1)[1]).upper() + current["severity"] = _unquote_rule_scalar( + raw_line.split(":", 1)[1] + ).upper() path_mode = None continue if raw_line.startswith(" languages: "): @@ -891,7 +902,9 @@ def cmd_init(args): selected_tools = tool_groups.get(tool, [tool]) - unknown_tools = [selected for selected in selected_tools if selected not in tool_configs] + unknown_tools = [ + selected for selected in selected_tools if selected not in tool_configs + ] if unknown_tools: print(f"āŒ Error: Unknown tool '{tool}'", file=sys.stderr) print( @@ -1079,7 +1092,9 @@ def cmd_monitor(args): print("\nāœ… AppGuardrail monitor workflow installed!\n") print(f"Created/updated: {workflow_file.relative_to(project_root)}") print() - print("This workflow runs `appguardrail scan .` on pull requests, pushes, and manual dispatches.") + print( + "This workflow runs `appguardrail scan .` on pull requests, pushes, and manual dispatches." + ) return 0 @@ -1205,7 +1220,9 @@ def _path_matches_glob(path: str, pattern: str) -> bool: def _path_allowed_by_rule(path: str, include_paths, exclude_paths) -> bool: """Return whether a path passes optional YAML include/exclude filters.""" - if include_paths and not any(_path_matches_glob(path, glob) for glob in include_paths): + if include_paths and not any( + _path_matches_glob(path, glob) for glob in include_paths + ): return False if exclude_paths and any(_path_matches_glob(path, glob) for glob in exclude_paths): return False @@ -1229,12 +1246,9 @@ def _collect_files(base_path: Path): if entry.is_symlink(): continue if entry.is_dir(follow_symlinks=False): - if ( - entry.name not in SKIP_DIRS - and ( - not entry.name.startswith(".") - or entry.name in SECURITY_HIDDEN_DIRS - ) + if entry.name not in SKIP_DIRS and ( + not entry.name.startswith(".") + or entry.name in SECURITY_HIDDEN_DIRS ): dirs.append(entry.path) elif entry.is_file(follow_symlinks=False): @@ -1332,7 +1346,9 @@ def _finding_category(rule_id: str) -> str: return "payment" if "firebase" in rule or "supabase" in rule or "storage" in rule: return "storage" - if any(token in rule for token in ("auth", "session", "admin", "route-without-auth")): + if any( + token in rule for token in ("auth", "session", "admin", "route-without-auth") + ): return "authz" if any( token in rule @@ -1515,7 +1531,9 @@ def _run_codegraph_command(command, cwd: Path, action: str): f"CodeGraph command argument must be a string, got {type(arg).__name__}." ) if not arg.isprintable(): - raise RuntimeError("CodeGraph command argument contains control characters.") + raise RuntimeError( + "CodeGraph command argument contains control characters." + ) executable = Path(command[0]).name allowed_args = {("sync",), ("init", "-i"), ("status",)} @@ -1550,7 +1568,9 @@ def _run_codegraph_index(scan_path: Path): codegraph_dir = workdir / ".codegraph" if codegraph_dir.exists() and not codegraph_dir.is_dir(): - raise RuntimeError(f"CodeGraph path exists but is not a directory: {codegraph_dir}") + raise RuntimeError( + f"CodeGraph path exists but is not a directory: {codegraph_dir}" + ) if codegraph_dir.is_dir(): _run_codegraph_command([codegraph, "sync"], workdir, "sync") else: @@ -1613,7 +1633,9 @@ def _scan_file(file_path: Path, base_path: Path): base_path if base_path.is_dir() else Path(".").resolve() ) except ValueError: - rel_path = file_path.name if base_path.is_file() else file_path + rel_path = ( + file_path.name if base_path.is_file() else file_path + ) rel_path_for_filters = str(rel_path) if not _path_allowed_by_rule( rel_path_for_filters, include_paths, exclude_paths @@ -1626,7 +1648,9 @@ def _scan_file(file_path: Path, base_path: Path): base_path if base_path.is_dir() else Path(".").resolve() ) except ValueError: - rel_path = file_path.name if base_path.is_file() else file_path + rel_path = ( + file_path.name if base_path.is_file() else file_path + ) rel_path_str = _sanitize_terminal_output(str(rel_path)) start_idx = match.start() @@ -1716,7 +1740,9 @@ def _print_scan_results(findings, files_scanned): print("\nāœ… No deploy-blocking critical or high issues found.") if findings: - print("\nšŸ’” Run 'appguardrail review' to get an AI prompt for fixing these issues.") + print( + "\nšŸ’” Run 'appguardrail review' to get an AI prompt for fixing these issues." + ) print() diff --git a/tests/test_appguardrail.py b/tests/test_appguardrail.py index 999445b..1136f1e 100644 --- a/tests/test_appguardrail.py +++ b/tests/test_appguardrail.py @@ -6,20 +6,14 @@ import pytest -from scanner.cli.appguardrail import ( - SCAN_RULES, - _collect_files, - _load_packaged_regex_rules, - _path_allowed_by_rule, - _print_scan_results, - _run_codegraph_command, - _run_codegraph_index, - _run_trivy_fs, - _scan_file, - cmd_init, - cmd_monitor, - cmd_scan, -) +from scanner.cli.appguardrail import (SCAN_RULES, _collect_files, + _load_packaged_regex_rules, + _path_allowed_by_rule, + _print_scan_results, + _run_codegraph_command, + _run_codegraph_index, _run_trivy_fs, + _scan_file, cmd_init, cmd_monitor, + cmd_scan) MOCK_RULES = [ { @@ -65,8 +59,10 @@ class MonitorArgs: def _create_symlink(target, link, target_is_directory=False): try: link.symlink_to(target, target_is_directory=target_is_directory) - except (NotImplementedError, OSError) as exc: # pragma: no cover - pytest.skip(f"symlinks are not available in this environment: {exc}") # pragma: no cover + except (NotImplementedError, OSError) as exc: # pragma: no cover + pytest.skip( + f"symlinks are not available in this environment: {exc}" + ) # pragma: no cover def test_scan_file_error_handling(tmp_path): @@ -148,11 +144,11 @@ def test_scan_file_detects_strix_derived_patterns(tmp_path): "ids": {"python-jwt-decode-without-algorithms"}, }, "data.py": { - "content": "cursor.execute(f\"SELECT * FROM users WHERE name = {name}\")\n", + "content": 'cursor.execute(f"SELECT * FROM users WHERE name = {name}")\n', "ids": {"python-dynamic-sql"}, }, "media.py": { - "content": "subprocess.run(f\"ffmpeg -i {source_path}\")\n", + "content": 'subprocess.run(f"ffmpeg -i {source_path}")\n', "ids": {"python-subprocess-string-command"}, }, "api.py": { @@ -478,9 +474,10 @@ def test_run_trivy_fs_maps_json_findings(tmp_path): "Process", (), {"returncode": 0, "stdout": json.dumps(report), "stderr": ""} )() - with patch( - "scanner.cli.appguardrail.shutil.which", return_value="/usr/bin/trivy" - ), patch("scanner.cli.appguardrail.subprocess.run", return_value=process) as run: + with ( + patch("scanner.cli.appguardrail.shutil.which", return_value="/usr/bin/trivy"), + patch("scanner.cli.appguardrail.subprocess.run", return_value=process) as run, + ): findings = _run_trivy_fs(tmp_path) assert run.call_args.args[0][:2] == ["/usr/bin/trivy", "fs"] @@ -503,10 +500,14 @@ def test_run_trivy_fs_maps_json_findings(tmp_path): def test_run_trivy_fs_passes_scan_path_as_literal_argument(tmp_path): scan_path = tmp_path / "literal;touch INJECTED" scan_path.mkdir() - process = type("Process", (), {"returncode": 0, "stdout": json.dumps({}), "stderr": ""})() + process = type( + "Process", (), {"returncode": 0, "stdout": json.dumps({}), "stderr": ""} + )() - with patch("scanner.cli.appguardrail.shutil.which", return_value="/usr/bin/trivy"), \ - patch("scanner.cli.appguardrail.subprocess.run", return_value=process) as run: + with ( + patch("scanner.cli.appguardrail.shutil.which", return_value="/usr/bin/trivy"), + patch("scanner.cli.appguardrail.subprocess.run", return_value=process) as run, + ): assert _run_trivy_fs(scan_path) == [] command = run.call_args.args[0] @@ -526,12 +527,15 @@ def test_run_codegraph_index_initializes_when_missing(tmp_path): )() init_process = type("Process", (), {"returncode": 0, "stdout": "", "stderr": ""})() - with patch( - "scanner.cli.appguardrail.shutil.which", return_value="/usr/bin/codegraph" - ), patch( - "scanner.cli.appguardrail.subprocess.run", - side_effect=[init_process, status_process], - ) as run: + with ( + patch( + "scanner.cli.appguardrail.shutil.which", return_value="/usr/bin/codegraph" + ), + patch( + "scanner.cli.appguardrail.subprocess.run", + side_effect=[init_process, status_process], + ) as run, + ): assert _run_codegraph_index(tmp_path) == "Index is up to date" assert run.call_args_list[0].args[0] == ["/usr/bin/codegraph", "init", "-i"] @@ -545,12 +549,15 @@ def test_run_codegraph_index_syncs_existing_index(tmp_path): )() sync_process = type("Process", (), {"returncode": 0, "stdout": "", "stderr": ""})() - with patch( - "scanner.cli.appguardrail.shutil.which", return_value="/usr/bin/codegraph" - ), patch( - "scanner.cli.appguardrail.subprocess.run", - side_effect=[sync_process, status_process], - ) as run: + with ( + patch( + "scanner.cli.appguardrail.shutil.which", return_value="/usr/bin/codegraph" + ), + patch( + "scanner.cli.appguardrail.subprocess.run", + side_effect=[sync_process, status_process], + ) as run, + ): assert _run_codegraph_index(tmp_path) == "Index is up to date" assert run.call_args_list[0].args[0] == ["/usr/bin/codegraph", "sync"] @@ -566,7 +573,9 @@ def test_run_codegraph_index_requires_codegraph(tmp_path): def test_run_codegraph_index_rejects_file_at_index_path(tmp_path): (tmp_path / ".codegraph").write_text("not a directory") - with patch("scanner.cli.appguardrail.shutil.which", return_value="/usr/bin/codegraph"): + with patch( + "scanner.cli.appguardrail.shutil.which", return_value="/usr/bin/codegraph" + ): with pytest.raises(RuntimeError, match="not a directory"): _run_codegraph_index(tmp_path) @@ -688,7 +697,9 @@ def test_cmd_scan_does_not_block_embedded_scanner_rule_fixtures(tmp_path, capsys assert "šŸ”“ 0 critical issues" in out -def test_cmd_scan_single_file_keeps_scanner_fixture_context(tmp_path, monkeypatch, capsys): +def test_cmd_scan_single_file_keeps_scanner_fixture_context( + tmp_path, monkeypatch, capsys +): monkeypatch.chdir(tmp_path) scanner_cli = tmp_path / "scanner" / "cli" scanner_cli.mkdir(parents=True) @@ -938,7 +949,10 @@ def test_cmd_init_unknown_tool(tmp_path, monkeypatch, capsys): assert excinfo.value.code == 1 captured = capsys.readouterr() assert "Error: Unknown tool 'invalid-tool'" in captured.err - assert "Supported tools are auto, cursor, codex, copilot, claude-code, windsurf, lovable" in captured.err + assert ( + "Supported tools are auto, cursor, codex, copilot, claude-code, windsurf, lovable" + in captured.err + ) def test_cmd_init_supabase_stack(tmp_path, monkeypatch, capsys): @@ -967,6 +981,7 @@ def test_sanitize_terminal_output(): # Test non-strings assert _sanitize_terminal_output(None) is None + def test_scan_file_insecure_deserialization(tmp_path): test_file = tmp_path / "unsafe.py" test_file.write_text( @@ -981,7 +996,8 @@ def test_scan_file_insecure_deserialization(tmp_path): ) findings = [ - finding for finding in _scan_file(test_file, tmp_path) + finding + for finding in _scan_file(test_file, tmp_path) if finding["rule_id"] == "python-insecure-deserialization" ] @@ -993,6 +1009,7 @@ def test_scan_file_insecure_deserialization(tmp_path): assert any("marshal.load" in finding["snippet"] for finding in findings) assert all("yaml.safe_load" not in finding["snippet"] for finding in findings) + def test_cmd_init_checklist_skipped(tmp_path, monkeypatch, capsys): monkeypatch.chdir(tmp_path) checklist_file = tmp_path / "APPGUARDRAIL_CHECKLIST.md" @@ -1004,3 +1021,25 @@ def test_cmd_init_checklist_skipped(tmp_path, monkeypatch, capsys): out = capsys.readouterr().out assert "Skipped (already configured):" in out assert "APPGUARDRAIL_CHECKLIST.md" in out + + +def test_ssrf_risk_rule(tmp_path): + from scanner.cli.appguardrail import SCAN_RULES, _scan_file + + test_file_js = tmp_path / "unsafe_fetch.js" + test_file_js.write_text("fetch(`https://api.example.com/user/${userId}`);\n") + + test_file_py = tmp_path / "unsafe_requests.py" + test_file_py.write_text('requests.get(f"https://api.example.com/{user_id}")\n') + + test_file_safe_js = tmp_path / "safe_fetch.js" + test_file_safe_js.write_text("fetch('https://api.example.com/users');\n") + + findings_js = _scan_file(test_file_js, tmp_path) + assert any(f["rule_id"] == "ssrf-risk" for f in findings_js) + + findings_py = _scan_file(test_file_py, tmp_path) + assert any(f["rule_id"] == "ssrf-risk" for f in findings_py) + + findings_safe_js = _scan_file(test_file_safe_js, tmp_path) + assert not any(f["rule_id"] == "ssrf-risk" for f in findings_safe_js)