From c8c26a38c900c611f9061e35c39c5af1d5419b8a Mon Sep 17 00:00:00 2001 From: Chris Campbell <91504044+texasbe2trill@users.noreply.github.com> Date: Thu, 16 Apr 2026 18:54:08 -0500 Subject: [PATCH] feat: Add optional --risk-scoring flag to scan command Adds a --risk-scoring flag that annotates each detected secret with risk_score, risk_level, risk_reasons, and context_tags based on file path and secret type heuristics. Scores are higher for secrets found in deployment/infrastructure paths and lower for test/example paths. The feature is fully opt-in and does not affect output when the flag is not used. Risk metadata is preserved across baseline updates. Signed-off-by: Chris Campbell <91504044+texasbe2trill@users.noreply.github.com> --- detect_secrets/core/baseline.py | 4 + detect_secrets/core/potential_secret.py | 10 ++ detect_secrets/core/risk_scoring.py | 111 +++++++++++++++++ detect_secrets/core/usage.py | 11 +- detect_secrets/main.py | 10 +- tests/core/risk_scoring_test.py | 159 ++++++++++++++++++++++++ 6 files changed, 302 insertions(+), 3 deletions(-) create mode 100644 detect_secrets/core/risk_scoring.py create mode 100644 tests/core/risk_scoring_test.py diff --git a/detect_secrets/core/baseline.py b/detect_secrets/core/baseline.py index 41692f266..aeefa0340 100644 --- a/detect_secrets/core/baseline.py +++ b/detect_secrets/core/baseline.py @@ -324,6 +324,10 @@ def merge_results(old_results, new_results): if 'is_secret' in old_secret and 'is_secret' not in new_secret: new_secret['is_secret'] = old_secret['is_secret'] + for key in ('risk_score', 'risk_level', 'risk_reasons', 'context_tags'): + if key in old_secret and key not in new_secret: + new_secret[key] = old_secret[key] + return new_results diff --git a/detect_secrets/core/potential_secret.py b/detect_secrets/core/potential_secret.py index 6b6abc596..74abc0ae2 100644 --- a/detect_secrets/core/potential_secret.py +++ b/detect_secrets/core/potential_secret.py @@ -59,6 +59,10 @@ def __init__( self.is_verified = is_verified self.verified_result = verified_result self.other_factors = {} + self.risk_score = None + self.risk_level = None + self.risk_reasons = None + self.context_tags = None # NOTE: Originally, we never wanted to keep the secret value in memory, # after finding it in the codebase. However, to support verifiable @@ -110,6 +114,12 @@ def json(self): if self.other_factors: attributes['other_factors'] = self.other_factors + if self.risk_score is not None: + attributes['risk_score'] = self.risk_score + attributes['risk_level'] = self.risk_level + attributes['risk_reasons'] = self.risk_reasons + attributes['context_tags'] = self.context_tags + return attributes def __eq__(self, other): diff --git a/detect_secrets/core/risk_scoring.py b/detect_secrets/core/risk_scoring.py new file mode 100644 index 000000000..9730f0d40 --- /dev/null +++ b/detect_secrets/core/risk_scoring.py @@ -0,0 +1,111 @@ +import re + + +BASE_SCORE = 50 + +HIGH_RISK_PATH_PATTERNS = [ + (r'\.github/workflows/', 15, 'ci-pipeline'), + (r'terraform/', 15, 'infrastructure'), + (r'helm/', 15, 'infrastructure'), + (r'k8s/', 15, 'infrastructure'), + (r'deploy/', 10, 'deployment'), + (r'prod/', 15, 'production'), + (r'\.env$', 10, 'dotenv'), + (r'values\.yaml$', 10, 'helm-values'), + (r'docker-compose', 10, 'docker'), + (r'Dockerfile', 5, 'docker'), +] + +LOW_RISK_PATH_PATTERNS = [ + (r'tests?/', -15, 'test'), + (r'test_data/', -15, 'test'), + (r'examples?/', -15, 'example'), + (r'docs?/', -10, 'documentation'), + (r'fixtures?/', -15, 'test-fixture'), + (r'mock', -10, 'mock'), + (r'sample', -10, 'sample'), +] + +HIGH_RISK_SECRET_TYPES = [ + 'Private Key', + 'AWS Access Key', + 'IBM Cloud IAM Key', + 'IBM COS HMAC Credentials', + 'Slack Token', + 'Stripe Access Key', + 'Twilio API Key', +] + +SENSITIVE_KEYWORD_RE = re.compile( + r'password|passwd|token|secret|apikey|api_key|client_secret|private_key', + re.IGNORECASE, +) + + +def compute_risk(secret): + """Compute risk metadata for a PotentialSecret. + + :type secret: detect_secrets.core.potential_secret.PotentialSecret + :rtype: dict with keys: risk_score, risk_level, risk_reasons, context_tags + """ + score = BASE_SCORE + reasons = [] + tags = set() + + filename = secret.filename or '' + + for pattern, adjustment, tag in HIGH_RISK_PATH_PATTERNS: + if re.search(pattern, filename): + score += adjustment + reasons.append('high-risk path: {}'.format(tag)) + tags.add(tag) + + for pattern, adjustment, tag in LOW_RISK_PATH_PATTERNS: + if re.search(pattern, filename): + score += adjustment + reasons.append('low-risk path: {}'.format(tag)) + tags.add(tag) + + if secret.type in HIGH_RISK_SECRET_TYPES: + score += 10 + reasons.append('high-risk secret type: {}'.format(secret.type)) + + if SENSITIVE_KEYWORD_RE.search(filename): + score += 5 + reasons.append('sensitive keyword in path') + + if secret.is_verified: + score += 15 + reasons.append('verified secret') + tags.add('verified') + + score = max(0, min(100, score)) + + return { + 'risk_score': score, + 'risk_level': _score_to_level(score), + 'risk_reasons': reasons, + 'context_tags': sorted(tags), + } + + +def _score_to_level(score): + if score >= 75: + return 'HIGH' + elif score >= 40: + return 'MEDIUM' + return 'LOW' + + +def annotate_secrets(secrets_collection): + """Apply risk scoring to all secrets in a SecretsCollection. + + :type secrets_collection: detect_secrets.core.secrets_collection.SecretsCollection + """ + for filename in secrets_collection.data: + for secret in secrets_collection.data[filename]: + risk = compute_risk(secret) + secret.risk_score = risk['risk_score'] + secret.risk_level = risk['risk_level'] + secret.risk_reasons = risk['risk_reasons'] + secret.context_tags = risk['context_tags'] diff --git a/detect_secrets/core/usage.py b/detect_secrets/core/usage.py index 5d532f3d0..ab608adcf 100644 --- a/detect_secrets/core/usage.py +++ b/detect_secrets/core/usage.py @@ -205,7 +205,8 @@ def add_arguments(self): self._add_initialize_baseline_argument()\ ._add_adhoc_scanning_argument()\ ._add_output_raw_argument()\ - ._add_suppress_unscannable_file_warnings() + ._add_suppress_unscannable_file_warnings()\ + ._add_risk_scoring_argument() PluginOptions(self.parser).add_arguments() @@ -289,6 +290,14 @@ def _add_suppress_unscannable_file_warnings(self): add_suppress_unscannable_file_warnings(self.parser) return self + def _add_risk_scoring_argument(self): + self.parser.add_argument( + '--risk-scoring', + action='store_true', + help='Add risk scoring metadata to each detected secret.', + ) + return self + class AuditOptions: def __init__(self, subparser): diff --git a/detect_secrets/main.py b/detect_secrets/main.py index 7bbc8f8f6..8178cacfb 100644 --- a/detect_secrets/main.py +++ b/detect_secrets/main.py @@ -6,6 +6,7 @@ from detect_secrets.core.common import write_baseline_to_file from detect_secrets.core.log import log from detect_secrets.core.report import report +from detect_secrets.core.risk_scoring import annotate_secrets from detect_secrets.core.secrets_collection import SecretsCollection from detect_secrets.core.usage import ParserBuilder from detect_secrets.plugins.common import initialize @@ -178,7 +179,7 @@ def _perform_scan(args, plugins, automaton, word_list_hash): if args.import_filename: _add_baseline_to_exclude_files(args) - new_baseline = baseline.initialize( + new_secrets_collection = baseline.initialize( plugins=plugins, exclude_files_regex=args.exclude_files, exclude_lines_regex=args.exclude_lines, @@ -189,7 +190,12 @@ def _perform_scan(args, plugins, automaton, word_list_hash): output_raw=args.output_raw, output_verified_false=args.output_verified_false, suppress_unscannable_file_warnings=args.suppress_unscannable_file_warnings, - ).format_for_baseline_output() + ) + + if getattr(args, 'risk_scoring', False): + annotate_secrets(new_secrets_collection) + + new_baseline = new_secrets_collection.format_for_baseline_output() if old_baseline: new_baseline = baseline.merge_baseline( diff --git a/tests/core/risk_scoring_test.py b/tests/core/risk_scoring_test.py new file mode 100644 index 000000000..4f6b05f09 --- /dev/null +++ b/tests/core/risk_scoring_test.py @@ -0,0 +1,159 @@ +import pytest + +from detect_secrets.core.potential_secret import PotentialSecret +from detect_secrets.core.risk_scoring import annotate_secrets +from detect_secrets.core.risk_scoring import compute_risk +from testing.factories import potential_secret_factory +from testing.factories import secrets_collection_factory + + +class TestComputeRisk: + + def test_baseline_score(self): + secret = potential_secret_factory(filename='src/app.py') + result = compute_risk(secret) + assert result['risk_score'] == 50 + assert result['risk_level'] == 'MEDIUM' + + @pytest.mark.parametrize( + 'filename, min_score', + [ + ('deploy/config.yaml', 60), + ('prod/secrets.env', 65), + ('terraform/main.tf', 65), + ('.github/workflows/ci.yml', 65), + ('helm/values.yaml', 65), + ('k8s/deployment.yaml', 65), + ], + ) + def test_high_risk_paths_increase_score(self, filename, min_score): + secret = potential_secret_factory(filename=filename) + result = compute_risk(secret) + assert result['risk_score'] >= min_score + assert len(result['risk_reasons']) > 0 + + @pytest.mark.parametrize( + 'filename, max_score', + [ + ('tests/test_app.py', 40), + ('test/test_app.py', 40), + ('docs/example.md', 45), + ('examples/demo.py', 40), + ('fixtures/data.json', 40), + ], + ) + def test_low_risk_paths_decrease_score(self, filename, max_score): + secret = potential_secret_factory(filename=filename) + result = compute_risk(secret) + assert result['risk_score'] <= max_score + + def test_high_risk_secret_type(self): + secret = potential_secret_factory( + type_='Private Key', + filename='src/app.py', + ) + result = compute_risk(secret) + assert result['risk_score'] == 60 + assert 'high-risk secret type: Private Key' in result['risk_reasons'] + + def test_verified_secret_increases_score(self): + secret = PotentialSecret( + 'type', 'src/app.py', 'secret', + lineno=1, is_verified=True, + ) + result = compute_risk(secret) + assert result['risk_score'] == 65 + assert 'verified' in result['context_tags'] + + def test_sensitive_keyword_in_path(self): + secret = potential_secret_factory(filename='config/password.ini') + result = compute_risk(secret) + assert result['risk_score'] == 55 + assert 'sensitive keyword in path' in result['risk_reasons'] + + def test_score_clamped_to_range(self): + secret = potential_secret_factory( + type_='Private Key', + filename='prod/deploy/terraform/.env', + ) + result = compute_risk(secret) + assert 0 <= result['risk_score'] <= 100 + + def test_risk_level_high(self): + secret = potential_secret_factory( + type_='Private Key', + filename='prod/deploy/values.yaml', + ) + result = compute_risk(secret) + assert result['risk_level'] == 'HIGH' + + def test_risk_level_low(self): + secret = potential_secret_factory(filename='tests/fixtures/mock.py') + result = compute_risk(secret) + assert result['risk_level'] == 'LOW' + + def test_context_tags_populated(self): + secret = potential_secret_factory(filename='deploy/config.yaml') + result = compute_risk(secret) + assert 'deployment' in result['context_tags'] + + def test_multiple_tags(self): + secret = potential_secret_factory(filename='prod/deploy/config.yaml') + result = compute_risk(secret) + assert 'production' in result['context_tags'] + assert 'deployment' in result['context_tags'] + + +class TestAnnotateSecrets: + + def test_annotates_collection(self): + collection = secrets_collection_factory( + secrets=[ + {'filename': 'deploy/app.py', 'secret': 'abc'}, + {'filename': 'tests/test.py', 'secret': 'def'}, + ], + ) + annotate_secrets(collection) + + for filename in collection.data: + for secret in collection.data[filename]: + assert secret.risk_score is not None + assert secret.risk_level is not None + assert isinstance(secret.risk_reasons, list) + assert isinstance(secret.context_tags, list) + + def test_annotated_secrets_appear_in_json(self): + collection = secrets_collection_factory( + secrets=[{'filename': 'prod/app.py', 'secret': 'abc'}], + ) + annotate_secrets(collection) + + for secret in collection.data['prod/app.py']: + output = secret.json() + assert 'risk_score' in output + assert 'risk_level' in output + assert 'risk_reasons' in output + assert 'context_tags' in output + + +class TestRiskScoringOptIn: + + def test_no_risk_metadata_by_default(self): + secret = potential_secret_factory() + output = secret.json() + assert 'risk_score' not in output + assert 'risk_level' not in output + assert 'risk_reasons' not in output + assert 'context_tags' not in output + + def test_risk_metadata_present_after_annotation(self): + secret = potential_secret_factory(filename='deploy/app.py') + risk = compute_risk(secret) + secret.risk_score = risk['risk_score'] + secret.risk_level = risk['risk_level'] + secret.risk_reasons = risk['risk_reasons'] + secret.context_tags = risk['context_tags'] + + output = secret.json() + assert output['risk_score'] >= 60 + assert output['risk_level'] in ('LOW', 'MEDIUM', 'HIGH')