diff --git a/.jules/bolt.md b/.jules/bolt.md index 49b1916..8c9009f 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -47,3 +47,7 @@ ## 2026-07-02 - Remove `re.search` fast-path pre-check **Learning:** Python's `re.finditer` evaluates lazily by allocating a lightweight C-level `ScannerObject`. Using `re.search` as a fast-path pre-check before `re.finditer` is an anti-pattern that addresses a non-existent bottleneck and degrades performance for matched paths by evaluating the regex twice. **Action:** Do not use `re.search` before `re.finditer` for optimization purposes. + +## 2024-07-03 - Optimize set deduplication with dict.fromkeys +**Learning:** In Python, iterating over a list to maintain insertion-order uniqueness (e.g. `if ref not in seen: seen.append(ref)`) requires O(N) lookup time, resulting in O(N^2) complexity overall. +**Action:** Use `dict.fromkeys(iterator)` which leverages Python's highly optimized, insertion-ordered C dictionary implementation to achieve fast deduplication in O(N) total time. diff --git a/appguardrail_core/rules.py b/appguardrail_core/rules.py index 87180a8..8a3074a 100644 --- a/appguardrail_core/rules.py +++ b/appguardrail_core/rules.py @@ -90,6 +90,12 @@ def as_dict(self) -> dict[str, Any]: def extract_public_references(message: str) -> tuple[str, ...]: """Extract OWASP, CWE, and CVE references already embedded in rule copy.""" + # ⚡ Bolt: Fast substring pre-filter to bypass expensive regex evaluation + # and match object allocations when the message definitely does not + # contain a public reference token. + if message and not ("CWE-" in message or "OWASP " in message or "CVE-" in message): + return () + seen: list[str] = [] for match in REFERENCE_RE.finditer(message or ""): reference = " ".join(match.group(1).split()) @@ -140,9 +146,4 @@ def validate_rule_metadata(metadata: RuleMetadata | dict[str, Any]) -> list[str] def _merge_references(*groups: tuple[str, ...]) -> tuple[str, ...]: - merged: list[str] = [] - for group in groups: - for reference in group: - if reference and reference not in merged: - merged.append(reference) - return tuple(merged) + return tuple(dict.fromkeys(ref for group in groups for ref in group if ref))