Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,7 @@
## 2026-07-02 - Remove `re.search` fast-path pre-check
**Learning:** Python's `re.finditer` evaluates lazily by allocating a lightweight C-level `ScannerObject`. Using `re.search` as a fast-path pre-check before `re.finditer` is an anti-pattern that addresses a non-existent bottleneck and degrades performance for matched paths by evaluating the regex twice.
**Action:** Do not use `re.search` before `re.finditer` for optimization purposes.

## 2024-07-03 - Defer expensive pathlib operations
**Learning:** Path.is_dir() and Path.resolve() trigger costly stat() system calls. Executing them unconditionally for every file scanned in a hot path causes significant overhead.
**Action:** Defer these calls using lazy evaluation (initializing to None and computing only when needed) to avoid overhead for files that don't match any rules.
128 changes: 64 additions & 64 deletions scanner/cli/appguardrail.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,32 +57,21 @@
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))

from appguardrail_core.external import build_external_scan_plan
from appguardrail_core.findings import (
NON_BLOCKING_CONTEXTS,
is_deploy_blocking as core_is_deploy_blocking,
normalize_findings,
)
from appguardrail_core.language import (
LANGUAGE_EXTENSIONS,
detect_language_axes,
detect_stack_profile,
)
from appguardrail_core.org_bundle import (
OrgBundleError,
annotate_missing_pr_repositories,
gh_error_message,
gh_pr_list,
gh_repo_list,
load_json as load_org_json,
render_org_evidence,
write_bundle,
)
from appguardrail_core.reports import (
REPORT_TYPE_LABELS,
ReportContext,
render_report,
supported_report_types,
)
from appguardrail_core.findings import NON_BLOCKING_CONTEXTS
from appguardrail_core.findings import \
is_deploy_blocking as core_is_deploy_blocking
from appguardrail_core.findings import normalize_findings
from appguardrail_core.language import (LANGUAGE_EXTENSIONS,
detect_language_axes,
detect_stack_profile)
from appguardrail_core.org_bundle import (OrgBundleError,
annotate_missing_pr_repositories,
gh_error_message, gh_pr_list,
gh_repo_list)
from appguardrail_core.org_bundle import load_json as load_org_json
from appguardrail_core.org_bundle import render_org_evidence, write_bundle
from appguardrail_core.reports import (REPORT_TYPE_LABELS, ReportContext,
render_report, supported_report_types)
from appguardrail_core.rules import build_rule_metadata

__version__ = "0.1.1"
Expand Down Expand Up @@ -725,7 +714,7 @@
r"(?i)<a\b(?=[^>\n]*target\s*=\s*[\"']_blank[\"'])(?![^>\n]*rel\s*=\s*[\"'][^\"']*(?:noopener|noreferrer))[^>\n]*href\s*=\s*[\"']https?://"
),
"severity": "WARNING",
"message": "External target=_blank link is missing rel=\"noopener noreferrer\". Add rel attributes to prevent reverse tabnabbing. [OWASP A05:2021 - Security Misconfiguration]",
"message": 'External target=_blank link is missing rel="noopener noreferrer". Add rel attributes to prevent reverse tabnabbing. [OWASP A05:2021 - Security Misconfiguration]',
"extensions": [".html", ".htm"],
},
{
Expand Down Expand Up @@ -873,6 +862,7 @@
},
]


def _unquote_rule_scalar(value: str) -> str:
"""Return a simple YAML scalar value from the controlled rule files."""
value = value.strip()
Expand Down Expand Up @@ -1451,10 +1441,7 @@ def cmd_scan(args):
try:
findings.extend(_run_semgrep_scan(scan_path, semgrep_config))
except RuntimeError as exc:
if (
external_plan.semgrep.auto_selected
and not external_plan.semgrep.forced
):
if external_plan.semgrep.auto_selected and not external_plan.semgrep.forced:
print(f"⚠️ Skipping Semgrep auto integration: {exc}\n")
else:
print(f"❌ Error: {exc}", file=sys.stderr)
Expand Down Expand Up @@ -1576,8 +1563,7 @@ def cmd_report(args):
or "Application source, configuration, and security workflow evidence.",
client_name=getattr(args, "client_name", None) or "n/a",
reviewer=getattr(args, "reviewer", None) or "AppGuardrail",
engagement_type=getattr(args, "engagement_type", None)
or "Pre-launch review",
engagement_type=getattr(args, "engagement_type", None) or "Pre-launch review",
based_on=getattr(args, "based_on", None) or "AppGuardrail findings JSON",
)
report = render_report(report_type, findings, context)
Expand Down Expand Up @@ -1614,11 +1600,13 @@ def cmd_org_bundle(args):
prs, collection_warnings = gh_pr_list(owner, repos, per_repo_pr_limit)
if prs_repository:
prs = annotate_missing_pr_repositories(prs, prs_repository)
generated_at, report, evidence_payload, inventory, pr_summary = render_org_evidence(
repos,
prs,
active_repository_target=active_repository_target,
generated_at=getattr(args, "generated_at", None),
generated_at, report, evidence_payload, inventory, pr_summary = (
render_org_evidence(
repos,
prs,
active_repository_target=active_repository_target,
generated_at=getattr(args, "generated_at", None),
)
)
manifest = write_bundle(
bundle_dir,
Expand All @@ -1643,7 +1631,9 @@ def cmd_org_bundle(args):
)
return 1
except subprocess.CalledProcessError as exc:
print(f"❌ Error: GitHub command failed: {gh_error_message(exc)}", file=sys.stderr)
print(
f"❌ Error: GitHub command failed: {gh_error_message(exc)}", file=sys.stderr
)
print(
"💡 Hint: Retry later or provide --repos-json and --prs-json.",
file=sys.stderr,
Expand Down Expand Up @@ -2298,20 +2288,22 @@ def _run_semgrep_scan(scan_path: Path, config: str = "auto"):

config = config or "auto"
try:
process = subprocess.run( # noqa: S603 - Semgrep path resolved with shutil.which
[
semgrep,
"scan",
"--config",
config,
"--json",
str(scan_path),
],
shell=False,
capture_output=True,
text=True,
check=False,
timeout=600,
process = (
subprocess.run( # noqa: S603 - Semgrep path resolved with shutil.which
[
semgrep,
"scan",
"--config",
config,
"--json",
str(scan_path),
],
shell=False,
capture_output=True,
text=True,
check=False,
timeout=600,
)
)
except subprocess.TimeoutExpired as exc:
raise RuntimeError("Semgrep scan timed out.") from exc
Expand Down Expand Up @@ -2378,13 +2370,15 @@ def _run_zap_baseline(target_url: str):
with tempfile.TemporaryDirectory() as tmpdir:
report_path = Path(tmpdir) / "zap-baseline.json"
try:
process = subprocess.run( # noqa: S603 - ZAP path resolved with shutil.which
[zap, "-t", target_url, "-J", str(report_path), "-I"],
shell=False,
capture_output=True,
text=True,
check=False,
timeout=900,
process = (
subprocess.run( # noqa: S603 - ZAP path resolved with shutil.which
[zap, "-t", target_url, "-J", str(report_path), "-I"],
shell=False,
capture_output=True,
text=True,
check=False,
timeout=900,
)
)
except subprocess.TimeoutExpired as exc:
raise RuntimeError("ZAP baseline scan timed out.") from exc
Expand Down Expand Up @@ -2474,7 +2468,7 @@ def _scan_file(file_path: Path, base_path: Path):
# ⚡ Bolt: Hoist expensive relative_to base_path resolution outside of loops.
# Path.is_dir() and Path.resolve() invoke stat() system calls. Doing this inside
# the finding iteration loop for every match was causing massive I/O overhead.
resolved_base_path = base_path if base_path.is_dir() else Path(".").resolve()
resolved_base_path = None

# ⚡ Bolt: Optimize stat calls by using os.lstat instead of Path objects
# Impact: Combines symlink, file type, and size checks into a single stat call
Expand Down Expand Up @@ -2521,6 +2515,10 @@ def _scan_file(file_path: Path, base_path: Path):
) in applicable_rules:
if include_paths or exclude_paths:
if rel_path_for_filters is None:
if resolved_base_path is None:
resolved_base_path = (
base_path if base_path.is_dir() else Path(".").resolve()
)
try:
rel_path = file_path.relative_to(resolved_base_path)
except ValueError:
Expand All @@ -2540,6 +2538,10 @@ def _scan_file(file_path: Path, base_path: Path):

for match in finditer(content):
if rel_path_str is None:
if resolved_base_path is None:
resolved_base_path = (
base_path if base_path.is_dir() else Path(".").resolve()
)
try:
rel_path = file_path.relative_to(resolved_base_path)
except ValueError:
Expand Down Expand Up @@ -2817,9 +2819,7 @@ def add_report_arguments(parser):
)
parser.add_argument("--app-name", default=None, help="Application name")
parser.add_argument("--repository", default=None, help="Repository name")
parser.add_argument(
"--commit", default=None, help="Commit SHA or version"
)
parser.add_argument("--commit", default=None, help="Commit SHA or version")
parser.add_argument(
"--generated-at", default=None, help="Report timestamp in ISO-8601 form"
)
Expand Down