BugTraceAI · jordi-murgo · May 22, 2026 · May 22, 2026
diff --git a/bugtrace/agents/gospider_agent.py b/bugtrace/agents/gospider_agent.py
@@ -1,4 +1,4 @@
-from typing import List, Dict, Any, Set
+from typing import List, Dict, Any, Set, Optional
 from loguru import logger
 from bugtrace.tools.external import external_tools
 from bugtrace.core.ui import dashboard
@@ -29,12 +29,13 @@ class GoSpiderAgent(BaseAgent):
     IMPROVED 2026-01-30: Extract ALL testable parameters, not just URLs.
     """
 
-    def __init__(self, target: str, report_dir: Path, max_depth: int = 2, max_urls: int = 10, event_bus: Any = None):
+    def __init__(self, target: str, report_dir: Path, max_depth: int = 2, max_urls: int = 10, event_bus: Any = None, scan_ctx_id: Optional[str] = None):
         super().__init__("GoSpiderAgent", "URL Discovery", event_bus=event_bus, agent_id="gospider_agent")
         self.target = target
         self.report_dir = report_dir
         self.max_depth = max_depth
         self.max_urls = max_urls
+        self.scan_ctx_id = scan_ctx_id
         self.target_domain = urlparse(target).hostname.lower() if urlparse(target).hostname else ""
 
         # Load extension filters from config
@@ -73,11 +74,27 @@ def _should_analyze_url(self, url: str) -> bool:
 
     async def _discover_urls(self) -> List[str]:
         """Run GoSpider and fallback discovery if needed."""
+        cookies: List[Dict[str, str]] = []
+        auth_extra_headers: Dict[str, str] = {}
+        if self.scan_ctx_id:
+            from bugtrace.services.scan_context import get_scan_auth_headers
+            auth_headers = get_scan_auth_headers(self.scan_ctx_id)
+            if "Cookie" in auth_headers:
+                cookie_str = auth_headers["Cookie"]
+                cookies = [{"name": p.split("=")[0], "value": "=".join(p.split("=")[1:])}
+                           for p in cookie_str.split("; ") if "=" in p]
+                dashboard.log(f"[{self.name}] Using {len(cookies)} auth cookies for crawling", "INFO")
+            if "Authorization" in auth_headers:
+                auth_extra_headers["Authorization"] = auth_headers["Authorization"]
+                dashboard.log(f"[{self.name}] Using Authorization header for crawling", "INFO")
+
         # Pass max_urls to support early exit (optimization)
         gospider_urls = await external_tools.run_gospider(
             self.target, 
+            cookies=cookies,
             depth=self.max_depth,
-            max_urls=self.max_urls
+            max_urls=self.max_urls,
+            extra_headers=auth_extra_headers if auth_extra_headers else None,
         )
 
         # If GoSpider only returns 1 URL (the target itself), trigger fallback

diff --git a/bugtrace/api/routes/reports.py b/bugtrace/api/routes/reports.py
@@ -150,46 +150,82 @@ def _find_report_dir(scan_id: int) -> FilePath | None:
         with db.get_session() as session:
             from bugtrace.schemas.db_models import ScanTable, TargetTable
             scan = session.get(ScanTable, scan_id)
-            if not scan:
-                return None
-            target = session.get(TargetTable, scan.target_id)
-            if not target:
-                return None
-
-            # Pattern 0: Direct DB match (new v5.1 architecture)
-            if hasattr(scan, 'report_dir') and scan.report_dir:
-                db_dir = FilePath(scan.report_dir)
-                if db_dir.is_dir() and _has_report_files(db_dir):
-                    return db_dir
-
-            # Pattern 1: Pipeline-generated reports ({domain}_{timestamp})
-            from urllib.parse import urlparse
-            domain = urlparse(target.url).hostname or ""
-            scan_ts = scan.timestamp.strftime("%Y%m%d_%H%M%S")
-
-            # Priority 1a: Exact timestamp match
-            exact_match = report_base / f"{domain}_{scan_ts}"
-            if exact_match.is_dir() and _has_report_files(exact_match):
-                return exact_match
-
-            # Priority 1b: Fuzzy match (latest for domain)
-            matches = sorted(
-                report_base.glob(f"{domain}_*"),
-                key=lambda p: p.stat().st_mtime,
-                reverse=True,
-            )
-            for match in matches:
-                if _has_report_files(match):
-                    return match
-
-            # Pattern 2: API-generated reports (fallback)
-            api_dir = report_base / f"scan_{scan_id}"
-            if api_dir.is_dir() and _has_report_files(api_dir):
-                return api_dir
+            if scan:
+                target = session.get(TargetTable, scan.target_id)
+
+                # Pattern 0: Direct DB match (new v5.1 architecture)
+                if hasattr(scan, 'report_dir') and scan.report_dir:
+                    db_dir = FilePath(scan.report_dir)
+                    if db_dir.is_dir() and _has_report_files(db_dir):
+                        return db_dir
+
+                # Pattern 1: Pipeline-generated reports ({domain}_{timestamp})
+                from urllib.parse import urlparse
+                if target:
+                    domain = urlparse(target.url).hostname or ""
+                    scan_ts = scan.timestamp.strftime("%Y%m%d_%H%M%S")
+
+                    # Priority 1a: Exact timestamp match
+                    exact_match = report_base / f"{domain}_{scan_ts}"
+                    if exact_match.is_dir() and _has_report_files(exact_match):
+                        return exact_match
+
+                    # Priority 1b: Fuzzy match (latest for domain)
+                    matches = sorted(
+                        report_base.glob(f"{domain}_*"),
+                        key=lambda p: p.stat().st_mtime,
+                        reverse=True,
+                    )
+                    for match in matches:
+                        if _has_report_files(match):
+                            return match
+
+                # Pattern 2: API-generated reports (fallback)
+                api_dir = report_base / f"scan_{scan_id}"
+                if api_dir.is_dir() and _has_report_files(api_dir):
+                    return api_dir
+
+            # DB has no scan record - fall through to filesystem scan below
 
     except Exception as e:
         logger.warning(f"Error resolving report dir for scan {scan_id}: {e}")
 
+    # Filesystem fallback: scan ALL report dirs for scan_id in metadata
+    import json as _json
+    for report_dir in sorted(
+        report_base.glob("*_*"),
+        key=lambda p: p.stat().st_mtime,
+        reverse=True,
+    ):
+        if not report_dir.is_dir():
+            continue
+        if not _has_report_files(report_dir):
+            continue
+        vf = report_dir / "validated_findings.json"
+        if vf.is_file():
+            try:
+                data = _json.loads(vf.read_text())
+                if isinstance(data, dict):
+                    if data.get("scan_id") == scan_id:
+                        return report_dir
+                    meta = data.get("meta", {})
+                    if isinstance(meta, dict) and meta.get("scan_id") == scan_id:
+                        return report_dir
+            except Exception:
+                pass
+        rf = report_dir / "raw_findings.json"
+        if rf.is_file():
+            try:
+                data = _json.loads(rf.read_text())
+                if isinstance(data, dict):
+                    if data.get("scan_id") == scan_id:
+                        return report_dir
+                    meta = data.get("meta", {})
+                    if isinstance(meta, dict) and meta.get("scan_id") == scan_id:
+                        return report_dir
+            except Exception:
+                pass
+
     # Last resort: check scan_{id} without DB access
     api_dir = report_base / f"scan_{scan_id}"
     if api_dir.is_dir() and _has_report_files(api_dir):

diff --git a/bugtrace/api/routes/scans.py b/bugtrace/api/routes/scans.py
@@ -100,6 +100,7 @@ def _build_scan_options(request: CreateScanRequest) -> ScanOptions:
         scan_depth=request.scan_depth,
         auth_token=request.auth_token,
         auth=request.auth,
+        auth_format=request.auth_format,
         url_list=request.url_list,
     )
 

diff --git a/bugtrace/api/schemas.py b/bugtrace/api/schemas.py
@@ -34,6 +34,7 @@ class CreateScanRequest(BaseModel):
     param: Optional[str] = Field(default=None, description="Specific parameter to target")
     auth_token: Optional[str] = Field(default=None, description="Pre-authenticated Bearer token (Level 1)")
     auth: Optional[Dict[str, Any]] = Field(default=None, description="Auto-login credentials: {login_url, credentials: {email, password}} (Level 2)")
+    auth_format: Optional[str] = Field(default=None, description="Login format for Auth Level 2: json or form")
     url_list: Optional[List[str]] = Field(default=None, description="Pre-defined URL list (from URL list file or Swagger import)")
 
 

diff --git a/bugtrace/core/team.py b/bugtrace/core/team.py
@@ -1502,7 +1502,7 @@ async def _run_gospider(self, recon_dir) -> list:
         """Run GoSpider agent for URL discovery."""
         logger.info(f"Triggering GoSpiderAgent for {self.target}")
         self._v.emit("recon.gospider.started", {"target": self.target})
-        gospider = GoSpiderAgent(self.target, recon_dir, max_depth=self.max_depth, max_urls=self.max_urls)
+        gospider = GoSpiderAgent(self.target, recon_dir, max_depth=self.max_depth, max_urls=self.max_urls, scan_ctx_id=self.scan_context)
         urls_to_scan = await gospider.run()
         self._v.emit("recon.gospider.completed", {"urls_found": len(urls_to_scan)})
         logger.info(f"GoSpiderAgent finished. Found {len(urls_to_scan)} URLs")

diff --git a/bugtrace/services/scan_context.py b/bugtrace/services/scan_context.py
@@ -89,6 +89,7 @@ class ScanOptions(BaseModel):
     scan_depth: str = ""  # empty = use settings.SCAN_DEPTH default
     auth_token: Optional[str] = None  # Level 1: pre-authenticated Bearer token
     auth: Optional[Dict[str, Any]] = None  # Level 2: {login_url, credentials: {email, password}}
+    auth_format: Optional[str] = None  # "json" or "form". If None, defaults to "json" in scan_service
     url_list: Optional[List[str]] = None  # Pre-defined URL list (from file upload or Swagger import)