ContextualWisdomLab · seonghobae · Jul 1, 2026 · github-actions · Jul 1, 2026
diff --git a/.jules/bolt.md b/.jules/bolt.md
@@ -43,3 +43,6 @@
 ## 2026-07-01 - O(N*M) Line Counting Optimization
 **Learning:** In `scanner/cli/appguardrail.py`, the `_scan_file` loop calculates line numbers by calling `count_newlines("\n", 0, start_idx)` for *every* regex match. In files with many matches, this repeatedly scans the string from the beginning, resulting in O(N*M) performance (where N is file length and M is matches). This is a massive bottleneck.
 **Action:** Since `re.finditer` yields matches strictly in order, always calculate line numbers progressively using a tracking variable `current_line` and `current_pos`. Update `current_line += count_newlines("\n", current_pos, start_idx)`. This makes the line calculation strictly O(N), bringing up to a 15x speedup for files with many hits.
+## 2026-07-02 - Deferring base_path.is_dir() and Path(".").resolve()
+**Learning:** Evaluating `base_path.is_dir()` and `Path(".").resolve()` invokes expensive synchronous `stat()` system calls. Pre-calculating these at the beginning of a hot path (like file scanning) incurs pure overhead for all files, especially since the vast majority of files don't have vulnerabilities and don't need this value.
+**Action:** Always initialize these variables to `None` and only evaluate `base_path.is_dir()` and `Path(".").resolve()` lazily right before they are actually needed (e.g. constructing match findings) to avoid unnecessary I/O.
diff --git a/scanner/cli/appguardrail.py b/scanner/cli/appguardrail.py
@@ -1873,7 +1873,9 @@ def _run_bandit_scan(scan_path: Path):
 
     if process.returncode not in {0, 1}:
         detail = (process.stderr or process.stdout).strip().splitlines()
-        raise RuntimeError("Bandit scan failed" + (f": {detail[-1]}" if detail else "."))
+        raise RuntimeError(
+            "Bandit scan failed" + (f": {detail[-1]}" if detail else ".")
+        )
 
     try:
         report = json.loads(process.stdout or "{}")
@@ -1972,9 +1974,7 @@ def _semgrep_findings(report: dict, base_path: Path):
     for item in report.get("results") or []:
         extra = item.get("extra") or {}
         start = item.get("start") or {}
-        path = _sanitize_terminal_output(
-            _trivy_target(item.get("path", ""), base_path)
-        )
+        path = _sanitize_terminal_output(_trivy_target(item.get("path", ""), base_path))
         check_id = item.get("check_id") or "semgrep"
         findings.append(
             _build_finding(
@@ -2171,11 +2171,6 @@ def _scan_file(file_path: Path, base_path: Path):
     """Scan a single file and return a list of findings."""
     findings = []
 
-    # ⚡ Bolt: Hoist expensive relative_to base_path resolution outside of loops.
-    # Path.is_dir() and Path.resolve() invoke stat() system calls. Doing this inside
-    # the finding iteration loop for every match was causing massive I/O overhead.
-    resolved_base_path = base_path if base_path.is_dir() else Path(".").resolve()
-
     # ⚡ Bolt: Optimize stat calls by using os.lstat instead of Path objects
     # Impact: Combines symlink, file type, and size checks into a single stat call
     try:
@@ -2198,6 +2193,8 @@ def _scan_file(file_path: Path, base_path: Path):
     # ⚡ Bolt: Defer expensive Pathlib operations (like relative_to) and string
     # sanitization until a match is actually found. This avoids significant overhead
     # for the vast majority of files that have no vulnerabilities.
+    # We also defer `base_path.is_dir()` and `Path(".").resolve()` which invoke `stat()`.
+    resolved_base_path = None
     rel_path_str = None
     rel_path_for_filters = None
     build_finding = _build_finding
@@ -2223,6 +2220,12 @@ def _scan_file(file_path: Path, base_path: Path):
                 if include_paths or exclude_paths:
                     if rel_path_for_filters is None:
                         try:
+                            if resolved_base_path is None:
+                                resolved_base_path = (
+                                    base_path
+                                    if base_path.is_dir()
+                                    else Path(".").resolve()
+                                )
                             rel_path = file_path.relative_to(resolved_base_path)
                         except ValueError:
                             rel_path = (
@@ -2246,6 +2249,12 @@ def _scan_file(file_path: Path, base_path: Path):
                 for match in finditer(content):
                     if rel_path_str is None:
                         try:
+                            if resolved_base_path is None:
+                                resolved_base_path = (
+                                    base_path
+                                    if base_path.is_dir()
+                                    else Path(".").resolve()
+                                )
                             rel_path = file_path.relative_to(resolved_base_path)
                         except ValueError:
                             rel_path = (