From c174662308d12abe9544aee275a8b2ad437c2bf1 Mon Sep 17 00:00:00 2001
From: seonghobae <8172694+seonghobae@users.noreply.github.com>
Date: Tue, 30 Jun 2026 14:29:48 +0000
Subject: [PATCH] =?UTF-8?q?=F0=9F=9B=A1=EF=B8=8F=20Sentinel:=20[CRITICAL]?=
 =?UTF-8?q?=20Fix=20Server-Side=20Request=20Forgery=20(SSRF)=20detection?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .jules/sentinel.md          |   5 ++
 scanner/cli/appguardrail.py |  60 ++++++++++++------
 tests/test_appguardrail.py  | 119 ++++++++++++++++++++++++------------
 3 files changed, 127 insertions(+), 57 deletions(-)

diff --git a/.jules/sentinel.md b/.jules/sentinel.md
index 69cb762..662e4da 100644
--- a/.jules/sentinel.md
+++ b/.jules/sentinel.md
@@ -56,3 +56,8 @@
 **Vulnerability:** The CLI file scanner `vibesec scan` lacked detection rules for insecure deserialization, such as `pickle.load` or `yaml.load` in Python, which could lead to arbitrary code execution.
 **Learning:** Adding regular expressions that detect known dangerous serialization libraries prevents severe security vulnerabilities when parsing untrusted data.
 **Prevention:** A new scanner rule `python-insecure-deserialization` was added to `SCAN_RULES` to flag `pickle.load(s)`, `yaml.load`, and `marshal.load(s)`.
+
+## 2026-06-25 - Expand Scanner Rules for SSRF
+**Vulnerability:** The AppGuardrail static analysis scanner lacked explicit detection for Server-Side Request Forgery (SSRF) risks caused by dynamically constructing URLs using untrusted inputs (e.g., Python `requests.get(f"...{var}...")` or Node `fetch(`...${var}...`)`).
+**Learning:** SSRF is a critical OWASP Top 10 vulnerability (A10:2021). Dynamic URL construction in network requests without validation is a common security failure, especially in code generated by AI that may not anticipate the network request being manipulated to reach internal systems.
+**Prevention:** A new rule `ssrf-risk` was added to `SCAN_RULES` to flag unsafe usage of network fetching functions (e.g., `fetch`, `axios`, `requests.get`, `urllib.request.urlopen`) when used with string concatenation, f-strings, or template literals.
diff --git a/scanner/cli/appguardrail.py b/scanner/cli/appguardrail.py
index 7540117..917fbee 100644
--- a/scanner/cli/appguardrail.py
+++ b/scanner/cli/appguardrail.py
@@ -35,8 +35,8 @@
 import json
 import os
 import re
-import shutil
 import shlex
+import shutil
 import stat
 import subprocess
 import sys
@@ -371,6 +371,15 @@
         "message": "Potential Command Injection detected: string concatenation or template literal in child_process execution. [OWASP A03:2021 - Injection]",
         "extensions": [".ts", ".tsx", ".js", ".jsx"],
     },
+    {
+        "id": "ssrf-risk",
+        "pattern": re.compile(
+            r"(?is)\b(?:fetch|axios(?:\.\w+)?|urllib(?:3)?\.(?:request\.)?urlopen|requests\.(?:get|post|put|delete|patch|request)|http\.client\.HTTPSConnection)\s*\(\s*(?:`[^`]*\$\{[^}]+\}[^`]*`|f[\"'][^\"']*\{[^}]+\}[^\"']*[\"']|[\"'].*?[\"']\s*\+\s*[a-zA-Z0-9_]+)"
+        ),
+        "severity": "CRITICAL",
+        "message": "Potential Server-Side Request Forgery (SSRF) detected: dynamic URL construction in network request. Validate and sanitize URLs before fetching. [OWASP A10:2021 - Server-Side Request Forgery (SSRF)]",
+        "extensions": [".ts", ".tsx", ".js", ".jsx", ".py"],
+    },
     {
         "id": "python-command-injection",
         "pattern": re.compile(
@@ -670,7 +679,9 @@ def finish_rule():
             path_mode = None
             continue
         if raw_line.startswith("    severity: "):
-            current["severity"] = _unquote_rule_scalar(raw_line.split(":", 1)[1]).upper()
+            current["severity"] = _unquote_rule_scalar(
+                raw_line.split(":", 1)[1]
+            ).upper()
             path_mode = None
             continue
         if raw_line.startswith("    languages: "):
@@ -891,7 +902,9 @@ def cmd_init(args):
 
     selected_tools = tool_groups.get(tool, [tool])
 
-    unknown_tools = [selected for selected in selected_tools if selected not in tool_configs]
+    unknown_tools = [
+        selected for selected in selected_tools if selected not in tool_configs
+    ]
     if unknown_tools:
         print(f"❌ Error: Unknown tool '{tool}'", file=sys.stderr)
         print(
@@ -1079,7 +1092,9 @@ def cmd_monitor(args):
     print("\n✅ AppGuardrail monitor workflow installed!\n")
     print(f"Created/updated: {workflow_file.relative_to(project_root)}")
     print()
-    print("This workflow runs `appguardrail scan .` on pull requests, pushes, and manual dispatches.")
+    print(
+        "This workflow runs `appguardrail scan .` on pull requests, pushes, and manual dispatches."
+    )
     return 0
 
 
@@ -1205,7 +1220,9 @@ def _path_matches_glob(path: str, pattern: str) -> bool:
 
 def _path_allowed_by_rule(path: str, include_paths, exclude_paths) -> bool:
     """Return whether a path passes optional YAML include/exclude filters."""
-    if include_paths and not any(_path_matches_glob(path, glob) for glob in include_paths):
+    if include_paths and not any(
+        _path_matches_glob(path, glob) for glob in include_paths
+    ):
         return False
     if exclude_paths and any(_path_matches_glob(path, glob) for glob in exclude_paths):
         return False
@@ -1229,12 +1246,9 @@ def _collect_files(base_path: Path):
                         if entry.is_symlink():
                             continue
                         if entry.is_dir(follow_symlinks=False):
-                            if (
-                                entry.name not in SKIP_DIRS
-                                and (
-                                    not entry.name.startswith(".")
-                                    or entry.name in SECURITY_HIDDEN_DIRS
-                                )
+                            if entry.name not in SKIP_DIRS and (
+                                not entry.name.startswith(".")
+                                or entry.name in SECURITY_HIDDEN_DIRS
                             ):
                                 dirs.append(entry.path)
                         elif entry.is_file(follow_symlinks=False):
@@ -1332,7 +1346,9 @@ def _finding_category(rule_id: str) -> str:
         return "payment"
     if "firebase" in rule or "supabase" in rule or "storage" in rule:
         return "storage"
-    if any(token in rule for token in ("auth", "session", "admin", "route-without-auth")):
+    if any(
+        token in rule for token in ("auth", "session", "admin", "route-without-auth")
+    ):
         return "authz"
     if any(
         token in rule
@@ -1515,7 +1531,9 @@ def _run_codegraph_command(command, cwd: Path, action: str):
                 f"CodeGraph command argument must be a string, got {type(arg).__name__}."
             )
         if not arg.isprintable():
-            raise RuntimeError("CodeGraph command argument contains control characters.")
+            raise RuntimeError(
+                "CodeGraph command argument contains control characters."
+            )
 
     executable = Path(command[0]).name
     allowed_args = {("sync",), ("init", "-i"), ("status",)}
@@ -1550,7 +1568,9 @@ def _run_codegraph_index(scan_path: Path):
 
     codegraph_dir = workdir / ".codegraph"
     if codegraph_dir.exists() and not codegraph_dir.is_dir():
-        raise RuntimeError(f"CodeGraph path exists but is not a directory: {codegraph_dir}")
+        raise RuntimeError(
+            f"CodeGraph path exists but is not a directory: {codegraph_dir}"
+        )
     if codegraph_dir.is_dir():
         _run_codegraph_command([codegraph, "sync"], workdir, "sync")
     else:
@@ -1613,7 +1633,9 @@ def _scan_file(file_path: Path, base_path: Path):
                                 base_path if base_path.is_dir() else Path(".").resolve()
                             )
                         except ValueError:
-                            rel_path = file_path.name if base_path.is_file() else file_path
+                            rel_path = (
+                                file_path.name if base_path.is_file() else file_path
+                            )
                         rel_path_for_filters = str(rel_path)
                     if not _path_allowed_by_rule(
                         rel_path_for_filters, include_paths, exclude_paths
@@ -1626,7 +1648,9 @@ def _scan_file(file_path: Path, base_path: Path):
                                 base_path if base_path.is_dir() else Path(".").resolve()
                             )
                         except ValueError:
-                            rel_path = file_path.name if base_path.is_file() else file_path
+                            rel_path = (
+                                file_path.name if base_path.is_file() else file_path
+                            )
                         rel_path_str = _sanitize_terminal_output(str(rel_path))
 
                     start_idx = match.start()
@@ -1716,7 +1740,9 @@ def _print_scan_results(findings, files_scanned):
         print("\n✅ No deploy-blocking critical or high issues found.")
 
     if findings:
-        print("\n💡 Run 'appguardrail review' to get an AI prompt for fixing these issues.")
+        print(
+            "\n💡 Run 'appguardrail review' to get an AI prompt for fixing these issues."
+        )
     print()
 
 
diff --git a/tests/test_appguardrail.py b/tests/test_appguardrail.py
index 999445b..1136f1e 100644
--- a/tests/test_appguardrail.py
+++ b/tests/test_appguardrail.py
@@ -6,20 +6,14 @@
 
 import pytest
 
-from scanner.cli.appguardrail import (
-    SCAN_RULES,
-    _collect_files,
-    _load_packaged_regex_rules,
-    _path_allowed_by_rule,
-    _print_scan_results,
-    _run_codegraph_command,
-    _run_codegraph_index,
-    _run_trivy_fs,
-    _scan_file,
-    cmd_init,
-    cmd_monitor,
-    cmd_scan,
-)
+from scanner.cli.appguardrail import (SCAN_RULES, _collect_files,
+                                      _load_packaged_regex_rules,
+                                      _path_allowed_by_rule,
+                                      _print_scan_results,
+                                      _run_codegraph_command,
+                                      _run_codegraph_index, _run_trivy_fs,
+                                      _scan_file, cmd_init, cmd_monitor,
+                                      cmd_scan)
 
 MOCK_RULES = [
     {
@@ -65,8 +59,10 @@ class MonitorArgs:
 def _create_symlink(target, link, target_is_directory=False):
     try:
         link.symlink_to(target, target_is_directory=target_is_directory)
-    except (NotImplementedError, OSError) as exc: # pragma: no cover
-        pytest.skip(f"symlinks are not available in this environment: {exc}") # pragma: no cover
+    except (NotImplementedError, OSError) as exc:  # pragma: no cover
+        pytest.skip(
+            f"symlinks are not available in this environment: {exc}"
+        )  # pragma: no cover
 
 
 def test_scan_file_error_handling(tmp_path):
@@ -148,11 +144,11 @@ def test_scan_file_detects_strix_derived_patterns(tmp_path):
             "ids": {"python-jwt-decode-without-algorithms"},
         },
         "data.py": {
-            "content": "cursor.execute(f\"SELECT * FROM users WHERE name = {name}\")\n",
+            "content": 'cursor.execute(f"SELECT * FROM users WHERE name = {name}")\n',
             "ids": {"python-dynamic-sql"},
         },
         "media.py": {
-            "content": "subprocess.run(f\"ffmpeg -i {source_path}\")\n",
+            "content": 'subprocess.run(f"ffmpeg -i {source_path}")\n',
             "ids": {"python-subprocess-string-command"},
         },
         "api.py": {
@@ -478,9 +474,10 @@ def test_run_trivy_fs_maps_json_findings(tmp_path):
         "Process", (), {"returncode": 0, "stdout": json.dumps(report), "stderr": ""}
     )()
 
-    with patch(
-        "scanner.cli.appguardrail.shutil.which", return_value="/usr/bin/trivy"
-    ), patch("scanner.cli.appguardrail.subprocess.run", return_value=process) as run:
+    with (
+        patch("scanner.cli.appguardrail.shutil.which", return_value="/usr/bin/trivy"),
+        patch("scanner.cli.appguardrail.subprocess.run", return_value=process) as run,
+    ):
         findings = _run_trivy_fs(tmp_path)
 
     assert run.call_args.args[0][:2] == ["/usr/bin/trivy", "fs"]
@@ -503,10 +500,14 @@ def test_run_trivy_fs_maps_json_findings(tmp_path):
 def test_run_trivy_fs_passes_scan_path_as_literal_argument(tmp_path):
     scan_path = tmp_path / "literal;touch INJECTED"
     scan_path.mkdir()
-    process = type("Process", (), {"returncode": 0, "stdout": json.dumps({}), "stderr": ""})()
+    process = type(
+        "Process", (), {"returncode": 0, "stdout": json.dumps({}), "stderr": ""}
+    )()
 
-    with patch("scanner.cli.appguardrail.shutil.which", return_value="/usr/bin/trivy"), \
-         patch("scanner.cli.appguardrail.subprocess.run", return_value=process) as run:
+    with (
+        patch("scanner.cli.appguardrail.shutil.which", return_value="/usr/bin/trivy"),
+        patch("scanner.cli.appguardrail.subprocess.run", return_value=process) as run,
+    ):
         assert _run_trivy_fs(scan_path) == []
 
     command = run.call_args.args[0]
@@ -526,12 +527,15 @@ def test_run_codegraph_index_initializes_when_missing(tmp_path):
     )()
     init_process = type("Process", (), {"returncode": 0, "stdout": "", "stderr": ""})()
 
-    with patch(
-        "scanner.cli.appguardrail.shutil.which", return_value="/usr/bin/codegraph"
-    ), patch(
-        "scanner.cli.appguardrail.subprocess.run",
-        side_effect=[init_process, status_process],
-    ) as run:
+    with (
+        patch(
+            "scanner.cli.appguardrail.shutil.which", return_value="/usr/bin/codegraph"
+        ),
+        patch(
+            "scanner.cli.appguardrail.subprocess.run",
+            side_effect=[init_process, status_process],
+        ) as run,
+    ):
         assert _run_codegraph_index(tmp_path) == "Index is up to date"
 
     assert run.call_args_list[0].args[0] == ["/usr/bin/codegraph", "init", "-i"]
@@ -545,12 +549,15 @@ def test_run_codegraph_index_syncs_existing_index(tmp_path):
     )()
     sync_process = type("Process", (), {"returncode": 0, "stdout": "", "stderr": ""})()
 
-    with patch(
-        "scanner.cli.appguardrail.shutil.which", return_value="/usr/bin/codegraph"
-    ), patch(
-        "scanner.cli.appguardrail.subprocess.run",
-        side_effect=[sync_process, status_process],
-    ) as run:
+    with (
+        patch(
+            "scanner.cli.appguardrail.shutil.which", return_value="/usr/bin/codegraph"
+        ),
+        patch(
+            "scanner.cli.appguardrail.subprocess.run",
+            side_effect=[sync_process, status_process],
+        ) as run,
+    ):
         assert _run_codegraph_index(tmp_path) == "Index is up to date"
 
     assert run.call_args_list[0].args[0] == ["/usr/bin/codegraph", "sync"]
@@ -566,7 +573,9 @@ def test_run_codegraph_index_requires_codegraph(tmp_path):
 def test_run_codegraph_index_rejects_file_at_index_path(tmp_path):
     (tmp_path / ".codegraph").write_text("not a directory")
 
-    with patch("scanner.cli.appguardrail.shutil.which", return_value="/usr/bin/codegraph"):
+    with patch(
+        "scanner.cli.appguardrail.shutil.which", return_value="/usr/bin/codegraph"
+    ):
         with pytest.raises(RuntimeError, match="not a directory"):
             _run_codegraph_index(tmp_path)
 
@@ -688,7 +697,9 @@ def test_cmd_scan_does_not_block_embedded_scanner_rule_fixtures(tmp_path, capsys
     assert "🔴 0 critical issues" in out
 
 
-def test_cmd_scan_single_file_keeps_scanner_fixture_context(tmp_path, monkeypatch, capsys):
+def test_cmd_scan_single_file_keeps_scanner_fixture_context(
+    tmp_path, monkeypatch, capsys
+):
     monkeypatch.chdir(tmp_path)
     scanner_cli = tmp_path / "scanner" / "cli"
     scanner_cli.mkdir(parents=True)
@@ -938,7 +949,10 @@ def test_cmd_init_unknown_tool(tmp_path, monkeypatch, capsys):
     assert excinfo.value.code == 1
     captured = capsys.readouterr()
     assert "Error: Unknown tool 'invalid-tool'" in captured.err
-    assert "Supported tools are auto, cursor, codex, copilot, claude-code, windsurf, lovable" in captured.err
+    assert (
+        "Supported tools are auto, cursor, codex, copilot, claude-code, windsurf, lovable"
+        in captured.err
+    )
 
 
 def test_cmd_init_supabase_stack(tmp_path, monkeypatch, capsys):
@@ -967,6 +981,7 @@ def test_sanitize_terminal_output():
     # Test non-strings
     assert _sanitize_terminal_output(None) is None
 
+
 def test_scan_file_insecure_deserialization(tmp_path):
     test_file = tmp_path / "unsafe.py"
     test_file.write_text(
@@ -981,7 +996,8 @@ def test_scan_file_insecure_deserialization(tmp_path):
     )
 
     findings = [
-        finding for finding in _scan_file(test_file, tmp_path)
+        finding
+        for finding in _scan_file(test_file, tmp_path)
         if finding["rule_id"] == "python-insecure-deserialization"
     ]
 
@@ -993,6 +1009,7 @@ def test_scan_file_insecure_deserialization(tmp_path):
     assert any("marshal.load" in finding["snippet"] for finding in findings)
     assert all("yaml.safe_load" not in finding["snippet"] for finding in findings)
 
+
 def test_cmd_init_checklist_skipped(tmp_path, monkeypatch, capsys):
     monkeypatch.chdir(tmp_path)
     checklist_file = tmp_path / "APPGUARDRAIL_CHECKLIST.md"
@@ -1004,3 +1021,25 @@ def test_cmd_init_checklist_skipped(tmp_path, monkeypatch, capsys):
     out = capsys.readouterr().out
     assert "Skipped (already configured):" in out
     assert "APPGUARDRAIL_CHECKLIST.md" in out
+
+
+def test_ssrf_risk_rule(tmp_path):
+    from scanner.cli.appguardrail import SCAN_RULES, _scan_file
+
+    test_file_js = tmp_path / "unsafe_fetch.js"
+    test_file_js.write_text("fetch(`https://api.example.com/user/${userId}`);\n")
+
+    test_file_py = tmp_path / "unsafe_requests.py"
+    test_file_py.write_text('requests.get(f"https://api.example.com/{user_id}")\n')
+
+    test_file_safe_js = tmp_path / "safe_fetch.js"
+    test_file_safe_js.write_text("fetch('https://api.example.com/users');\n")
+
+    findings_js = _scan_file(test_file_js, tmp_path)
+    assert any(f["rule_id"] == "ssrf-risk" for f in findings_js)
+
+    findings_py = _scan_file(test_file_py, tmp_path)
+    assert any(f["rule_id"] == "ssrf-risk" for f in findings_py)
+
+    findings_safe_js = _scan_file(test_file_safe_js, tmp_path)
+    assert not any(f["rule_id"] == "ssrf-risk" for f in findings_safe_js)