diff --git a/scripts/ci/noema_review_gate.py b/scripts/ci/noema_review_gate.py index 1e4661b7..39026eb8 100644 --- a/scripts/ci/noema_review_gate.py +++ b/scripts/ci/noema_review_gate.py @@ -33,18 +33,23 @@ RUNNING_STATES = {"QUEUED", "IN_PROGRESS", "PENDING", "REQUESTED", "WAITING", "EXPECTED"} MAX_DIFF_CHARS = 60000 +SENSITIVE_DATA_SCRUB_PATTERNS = ( + (re.compile(r'(bearer\s+)[^\s"\'\\]+', re.IGNORECASE), r'\1***'), + (re.compile(r'(token\s+)[^\s"\'\\]+', re.IGNORECASE), r'\1***'), + (re.compile(r'\b(?:github_pat_[A-Za-z0-9_]+|gh[pousr]_[A-Za-z0-9_]+)\b', re.IGNORECASE), '***'), + (re.compile(r'\bsk-[A-Za-z0-9_-]+'), '***'), + (re.compile(r'\bxox[baprs]-[A-Za-z0-9-]+'), '***'), + (re.compile(r'\bAKIA[0-9A-Z]{16}'), '***'), + (re.compile(r'((?:api[_-]?key|access[_-]?token|refresh[_-]?token|id[_-]?token|client[_-]?secret|password|passwd|secret)\s*[:=]\s*)["\']?[^"\'\s]+["\']?', re.IGNORECASE), r'\g<1>***'), +) + def scrub_sensitive_data(text: str | None) -> str | None: """Mask sensitive tokens in text to prevent secret leakage.""" if not text: return text - text = re.sub(r'(?i)(bearer\s+)[^\s"\'\\]+', r'\1***', text) - text = re.sub(r'(?i)(token\s+)[^\s"\'\\]+', r'\1***', text) - text = re.sub(r'(?i)\b(?:github_pat_[A-Za-z0-9_]+|gh[pousr]_[A-Za-z0-9_]+)\b', '***', text) - text = re.sub(r'\b(sk-[A-Za-z0-9_-]+)', '***', text) - text = re.sub(r'\b(xox[baprs]-[A-Za-z0-9-]+)', '***', text) - text = re.sub(r'\b(AKIA[0-9A-Z]{16})', '***', text) - text = re.sub(r'(?i)((?:api[_-]?key|access[_-]?token|refresh[_-]?token|id[_-]?token|client[_-]?secret|password|passwd|secret)\s*[:=]\s*)["\']?[^"\'\s]+["\']?', r'\1***', text) + for pattern, repl in SENSITIVE_DATA_SCRUB_PATTERNS: + text = pattern.sub(repl, text) return text