Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions bugtrace/agents/gospider_agent.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Dict, Any, Set
from typing import List, Dict, Any, Set, Optional
from loguru import logger
from bugtrace.tools.external import external_tools
from bugtrace.core.ui import dashboard
Expand Down Expand Up @@ -29,12 +29,13 @@ class GoSpiderAgent(BaseAgent):
IMPROVED 2026-01-30: Extract ALL testable parameters, not just URLs.
"""

def __init__(self, target: str, report_dir: Path, max_depth: int = 2, max_urls: int = 10, event_bus: Any = None):
def __init__(self, target: str, report_dir: Path, max_depth: int = 2, max_urls: int = 10, event_bus: Any = None, scan_ctx_id: Optional[str] = None):
super().__init__("GoSpiderAgent", "URL Discovery", event_bus=event_bus, agent_id="gospider_agent")
self.target = target
self.report_dir = report_dir
self.max_depth = max_depth
self.max_urls = max_urls
self.scan_ctx_id = scan_ctx_id
self.target_domain = urlparse(target).hostname.lower() if urlparse(target).hostname else ""

# Load extension filters from config
Expand Down Expand Up @@ -73,11 +74,27 @@ def _should_analyze_url(self, url: str) -> bool:

async def _discover_urls(self) -> List[str]:
"""Run GoSpider and fallback discovery if needed."""
cookies: List[Dict[str, str]] = []
auth_extra_headers: Dict[str, str] = {}
if self.scan_ctx_id:
from bugtrace.services.scan_context import get_scan_auth_headers
auth_headers = get_scan_auth_headers(self.scan_ctx_id)
if "Cookie" in auth_headers:
cookie_str = auth_headers["Cookie"]
cookies = [{"name": p.split("=")[0], "value": "=".join(p.split("=")[1:])}
for p in cookie_str.split("; ") if "=" in p]
dashboard.log(f"[{self.name}] Using {len(cookies)} auth cookies for crawling", "INFO")
if "Authorization" in auth_headers:
auth_extra_headers["Authorization"] = auth_headers["Authorization"]
dashboard.log(f"[{self.name}] Using Authorization header for crawling", "INFO")

# Pass max_urls to support early exit (optimization)
gospider_urls = await external_tools.run_gospider(
self.target,
cookies=cookies,
depth=self.max_depth,
max_urls=self.max_urls
max_urls=self.max_urls,
extra_headers=auth_extra_headers if auth_extra_headers else None,
)

# If GoSpider only returns 1 URL (the target itself), trigger fallback
Expand Down
108 changes: 72 additions & 36 deletions bugtrace/api/routes/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,46 +150,82 @@ def _find_report_dir(scan_id: int) -> FilePath | None:
with db.get_session() as session:
from bugtrace.schemas.db_models import ScanTable, TargetTable
scan = session.get(ScanTable, scan_id)
if not scan:
return None
target = session.get(TargetTable, scan.target_id)
if not target:
return None

# Pattern 0: Direct DB match (new v5.1 architecture)
if hasattr(scan, 'report_dir') and scan.report_dir:
db_dir = FilePath(scan.report_dir)
if db_dir.is_dir() and _has_report_files(db_dir):
return db_dir

# Pattern 1: Pipeline-generated reports ({domain}_{timestamp})
from urllib.parse import urlparse
domain = urlparse(target.url).hostname or ""
scan_ts = scan.timestamp.strftime("%Y%m%d_%H%M%S")

# Priority 1a: Exact timestamp match
exact_match = report_base / f"{domain}_{scan_ts}"
if exact_match.is_dir() and _has_report_files(exact_match):
return exact_match

# Priority 1b: Fuzzy match (latest for domain)
matches = sorted(
report_base.glob(f"{domain}_*"),
key=lambda p: p.stat().st_mtime,
reverse=True,
)
for match in matches:
if _has_report_files(match):
return match

# Pattern 2: API-generated reports (fallback)
api_dir = report_base / f"scan_{scan_id}"
if api_dir.is_dir() and _has_report_files(api_dir):
return api_dir
if scan:
target = session.get(TargetTable, scan.target_id)

# Pattern 0: Direct DB match (new v5.1 architecture)
if hasattr(scan, 'report_dir') and scan.report_dir:
db_dir = FilePath(scan.report_dir)
if db_dir.is_dir() and _has_report_files(db_dir):
return db_dir

# Pattern 1: Pipeline-generated reports ({domain}_{timestamp})
from urllib.parse import urlparse
if target:
domain = urlparse(target.url).hostname or ""
scan_ts = scan.timestamp.strftime("%Y%m%d_%H%M%S")

# Priority 1a: Exact timestamp match
exact_match = report_base / f"{domain}_{scan_ts}"
if exact_match.is_dir() and _has_report_files(exact_match):
return exact_match

# Priority 1b: Fuzzy match (latest for domain)
matches = sorted(
report_base.glob(f"{domain}_*"),
key=lambda p: p.stat().st_mtime,
reverse=True,
)
for match in matches:
if _has_report_files(match):
return match

# Pattern 2: API-generated reports (fallback)
api_dir = report_base / f"scan_{scan_id}"
if api_dir.is_dir() and _has_report_files(api_dir):
return api_dir

# DB has no scan record - fall through to filesystem scan below

except Exception as e:
logger.warning(f"Error resolving report dir for scan {scan_id}: {e}")

# Filesystem fallback: scan ALL report dirs for scan_id in metadata
import json as _json
for report_dir in sorted(
report_base.glob("*_*"),
key=lambda p: p.stat().st_mtime,
reverse=True,
):
if not report_dir.is_dir():
continue
if not _has_report_files(report_dir):
continue
vf = report_dir / "validated_findings.json"
if vf.is_file():
try:
data = _json.loads(vf.read_text())
if isinstance(data, dict):
if data.get("scan_id") == scan_id:
return report_dir
meta = data.get("meta", {})
if isinstance(meta, dict) and meta.get("scan_id") == scan_id:
return report_dir
except Exception:
pass
rf = report_dir / "raw_findings.json"
if rf.is_file():
try:
data = _json.loads(rf.read_text())
if isinstance(data, dict):
if data.get("scan_id") == scan_id:
return report_dir
meta = data.get("meta", {})
if isinstance(meta, dict) and meta.get("scan_id") == scan_id:
return report_dir
except Exception:
pass

# Last resort: check scan_{id} without DB access
api_dir = report_base / f"scan_{scan_id}"
if api_dir.is_dir() and _has_report_files(api_dir):
Expand Down
1 change: 1 addition & 0 deletions bugtrace/api/routes/scans.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def _build_scan_options(request: CreateScanRequest) -> ScanOptions:
scan_depth=request.scan_depth,
auth_token=request.auth_token,
auth=request.auth,
auth_format=request.auth_format,
url_list=request.url_list,
)

Expand Down
1 change: 1 addition & 0 deletions bugtrace/api/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class CreateScanRequest(BaseModel):
param: Optional[str] = Field(default=None, description="Specific parameter to target")
auth_token: Optional[str] = Field(default=None, description="Pre-authenticated Bearer token (Level 1)")
auth: Optional[Dict[str, Any]] = Field(default=None, description="Auto-login credentials: {login_url, credentials: {email, password}} (Level 2)")
auth_format: Optional[str] = Field(default=None, description="Login format for Auth Level 2: json or form")
url_list: Optional[List[str]] = Field(default=None, description="Pre-defined URL list (from URL list file or Swagger import)")


Expand Down
2 changes: 1 addition & 1 deletion bugtrace/core/team.py
Original file line number Diff line number Diff line change
Expand Up @@ -1502,7 +1502,7 @@ async def _run_gospider(self, recon_dir) -> list:
"""Run GoSpider agent for URL discovery."""
logger.info(f"Triggering GoSpiderAgent for {self.target}")
self._v.emit("recon.gospider.started", {"target": self.target})
gospider = GoSpiderAgent(self.target, recon_dir, max_depth=self.max_depth, max_urls=self.max_urls)
gospider = GoSpiderAgent(self.target, recon_dir, max_depth=self.max_depth, max_urls=self.max_urls, scan_ctx_id=self.scan_context)
urls_to_scan = await gospider.run()
self._v.emit("recon.gospider.completed", {"urls_found": len(urls_to_scan)})
logger.info(f"GoSpiderAgent finished. Found {len(urls_to_scan)} URLs")
Expand Down
1 change: 1 addition & 0 deletions bugtrace/services/scan_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class ScanOptions(BaseModel):
scan_depth: str = "" # empty = use settings.SCAN_DEPTH default
auth_token: Optional[str] = None # Level 1: pre-authenticated Bearer token
auth: Optional[Dict[str, Any]] = None # Level 2: {login_url, credentials: {email, password}}
auth_format: Optional[str] = None # "json" or "form". If None, defaults to "json" in scan_service
url_list: Optional[List[str]] = None # Pre-defined URL list (from file upload or Swagger import)


Expand Down
Loading