Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions detect_secrets/core/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,10 @@ def merge_results(old_results, new_results):
if 'is_secret' in old_secret and 'is_secret' not in new_secret:
new_secret['is_secret'] = old_secret['is_secret']

for key in ('risk_score', 'risk_level', 'risk_reasons', 'context_tags'):
if key in old_secret and key not in new_secret:
new_secret[key] = old_secret[key]

return new_results


Expand Down
10 changes: 10 additions & 0 deletions detect_secrets/core/potential_secret.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ def __init__(
self.is_verified = is_verified
self.verified_result = verified_result
self.other_factors = {}
self.risk_score = None
self.risk_level = None
self.risk_reasons = None
self.context_tags = None

# NOTE: Originally, we never wanted to keep the secret value in memory,
# after finding it in the codebase. However, to support verifiable
Expand Down Expand Up @@ -110,6 +114,12 @@ def json(self):
if self.other_factors:
attributes['other_factors'] = self.other_factors

if self.risk_score is not None:
attributes['risk_score'] = self.risk_score
attributes['risk_level'] = self.risk_level
attributes['risk_reasons'] = self.risk_reasons
attributes['context_tags'] = self.context_tags

return attributes

def __eq__(self, other):
Expand Down
111 changes: 111 additions & 0 deletions detect_secrets/core/risk_scoring.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import re


BASE_SCORE = 50

HIGH_RISK_PATH_PATTERNS = [
(r'\.github/workflows/', 15, 'ci-pipeline'),
(r'terraform/', 15, 'infrastructure'),
(r'helm/', 15, 'infrastructure'),
(r'k8s/', 15, 'infrastructure'),
(r'deploy/', 10, 'deployment'),
(r'prod/', 15, 'production'),
(r'\.env$', 10, 'dotenv'),
(r'values\.yaml$', 10, 'helm-values'),
(r'docker-compose', 10, 'docker'),
(r'Dockerfile', 5, 'docker'),
]

LOW_RISK_PATH_PATTERNS = [
(r'tests?/', -15, 'test'),
(r'test_data/', -15, 'test'),
(r'examples?/', -15, 'example'),
(r'docs?/', -10, 'documentation'),
(r'fixtures?/', -15, 'test-fixture'),
(r'mock', -10, 'mock'),
(r'sample', -10, 'sample'),
]

HIGH_RISK_SECRET_TYPES = [
'Private Key',
'AWS Access Key',
'IBM Cloud IAM Key',
'IBM COS HMAC Credentials',
'Slack Token',
'Stripe Access Key',
'Twilio API Key',
]

SENSITIVE_KEYWORD_RE = re.compile(
r'password|passwd|token|secret|apikey|api_key|client_secret|private_key',
re.IGNORECASE,
)


def compute_risk(secret):
"""Compute risk metadata for a PotentialSecret.

:type secret: detect_secrets.core.potential_secret.PotentialSecret
:rtype: dict with keys: risk_score, risk_level, risk_reasons, context_tags
"""
score = BASE_SCORE
reasons = []
tags = set()

filename = secret.filename or ''

for pattern, adjustment, tag in HIGH_RISK_PATH_PATTERNS:
if re.search(pattern, filename):
score += adjustment
reasons.append('high-risk path: {}'.format(tag))
tags.add(tag)

for pattern, adjustment, tag in LOW_RISK_PATH_PATTERNS:
if re.search(pattern, filename):
score += adjustment
reasons.append('low-risk path: {}'.format(tag))
tags.add(tag)

if secret.type in HIGH_RISK_SECRET_TYPES:
score += 10
reasons.append('high-risk secret type: {}'.format(secret.type))

if SENSITIVE_KEYWORD_RE.search(filename):
score += 5
reasons.append('sensitive keyword in path')

if secret.is_verified:
score += 15
reasons.append('verified secret')
tags.add('verified')

score = max(0, min(100, score))

return {
'risk_score': score,
'risk_level': _score_to_level(score),
'risk_reasons': reasons,
'context_tags': sorted(tags),
}


def _score_to_level(score):
if score >= 75:
return 'HIGH'
elif score >= 40:
return 'MEDIUM'
return 'LOW'


def annotate_secrets(secrets_collection):
"""Apply risk scoring to all secrets in a SecretsCollection.

:type secrets_collection: detect_secrets.core.secrets_collection.SecretsCollection
"""
for filename in secrets_collection.data:
for secret in secrets_collection.data[filename]:
risk = compute_risk(secret)
secret.risk_score = risk['risk_score']
secret.risk_level = risk['risk_level']
secret.risk_reasons = risk['risk_reasons']
secret.context_tags = risk['context_tags']
11 changes: 10 additions & 1 deletion detect_secrets/core/usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,8 @@ def add_arguments(self):
self._add_initialize_baseline_argument()\
._add_adhoc_scanning_argument()\
._add_output_raw_argument()\
._add_suppress_unscannable_file_warnings()
._add_suppress_unscannable_file_warnings()\
._add_risk_scoring_argument()

PluginOptions(self.parser).add_arguments()

Expand Down Expand Up @@ -289,6 +290,14 @@ def _add_suppress_unscannable_file_warnings(self):
add_suppress_unscannable_file_warnings(self.parser)
return self

def _add_risk_scoring_argument(self):
self.parser.add_argument(
'--risk-scoring',
action='store_true',
help='Add risk scoring metadata to each detected secret.',
)
return self


class AuditOptions:
def __init__(self, subparser):
Expand Down
10 changes: 8 additions & 2 deletions detect_secrets/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from detect_secrets.core.common import write_baseline_to_file
from detect_secrets.core.log import log
from detect_secrets.core.report import report
from detect_secrets.core.risk_scoring import annotate_secrets
from detect_secrets.core.secrets_collection import SecretsCollection
from detect_secrets.core.usage import ParserBuilder
from detect_secrets.plugins.common import initialize
Expand Down Expand Up @@ -178,7 +179,7 @@ def _perform_scan(args, plugins, automaton, word_list_hash):
if args.import_filename:
_add_baseline_to_exclude_files(args)

new_baseline = baseline.initialize(
new_secrets_collection = baseline.initialize(
plugins=plugins,
exclude_files_regex=args.exclude_files,
exclude_lines_regex=args.exclude_lines,
Expand All @@ -189,7 +190,12 @@ def _perform_scan(args, plugins, automaton, word_list_hash):
output_raw=args.output_raw,
output_verified_false=args.output_verified_false,
suppress_unscannable_file_warnings=args.suppress_unscannable_file_warnings,
).format_for_baseline_output()
)

if getattr(args, 'risk_scoring', False):
annotate_secrets(new_secrets_collection)

new_baseline = new_secrets_collection.format_for_baseline_output()

if old_baseline:
new_baseline = baseline.merge_baseline(
Expand Down
159 changes: 159 additions & 0 deletions tests/core/risk_scoring_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import pytest

from detect_secrets.core.potential_secret import PotentialSecret
from detect_secrets.core.risk_scoring import annotate_secrets
from detect_secrets.core.risk_scoring import compute_risk
from testing.factories import potential_secret_factory
from testing.factories import secrets_collection_factory


class TestComputeRisk:

def test_baseline_score(self):
secret = potential_secret_factory(filename='src/app.py')
result = compute_risk(secret)
assert result['risk_score'] == 50
assert result['risk_level'] == 'MEDIUM'

@pytest.mark.parametrize(
'filename, min_score',
[
('deploy/config.yaml', 60),
('prod/secrets.env', 65),
('terraform/main.tf', 65),
('.github/workflows/ci.yml', 65),
('helm/values.yaml', 65),
('k8s/deployment.yaml', 65),
],
)
def test_high_risk_paths_increase_score(self, filename, min_score):
secret = potential_secret_factory(filename=filename)
result = compute_risk(secret)
assert result['risk_score'] >= min_score
assert len(result['risk_reasons']) > 0

@pytest.mark.parametrize(
'filename, max_score',
[
('tests/test_app.py', 40),
('test/test_app.py', 40),
('docs/example.md', 45),
('examples/demo.py', 40),
('fixtures/data.json', 40),
],
)
def test_low_risk_paths_decrease_score(self, filename, max_score):
secret = potential_secret_factory(filename=filename)
result = compute_risk(secret)
assert result['risk_score'] <= max_score

def test_high_risk_secret_type(self):
secret = potential_secret_factory(
type_='Private Key',
filename='src/app.py',
)
result = compute_risk(secret)
assert result['risk_score'] == 60
assert 'high-risk secret type: Private Key' in result['risk_reasons']

def test_verified_secret_increases_score(self):
secret = PotentialSecret(
'type', 'src/app.py', 'secret',
lineno=1, is_verified=True,
)
result = compute_risk(secret)
assert result['risk_score'] == 65
assert 'verified' in result['context_tags']

def test_sensitive_keyword_in_path(self):
secret = potential_secret_factory(filename='config/password.ini')
result = compute_risk(secret)
assert result['risk_score'] == 55
assert 'sensitive keyword in path' in result['risk_reasons']

def test_score_clamped_to_range(self):
secret = potential_secret_factory(
type_='Private Key',
filename='prod/deploy/terraform/.env',
)
result = compute_risk(secret)
assert 0 <= result['risk_score'] <= 100

def test_risk_level_high(self):
secret = potential_secret_factory(
type_='Private Key',
filename='prod/deploy/values.yaml',
)
result = compute_risk(secret)
assert result['risk_level'] == 'HIGH'

def test_risk_level_low(self):
secret = potential_secret_factory(filename='tests/fixtures/mock.py')
result = compute_risk(secret)
assert result['risk_level'] == 'LOW'

def test_context_tags_populated(self):
secret = potential_secret_factory(filename='deploy/config.yaml')
result = compute_risk(secret)
assert 'deployment' in result['context_tags']

def test_multiple_tags(self):
secret = potential_secret_factory(filename='prod/deploy/config.yaml')
result = compute_risk(secret)
assert 'production' in result['context_tags']
assert 'deployment' in result['context_tags']


class TestAnnotateSecrets:

def test_annotates_collection(self):
collection = secrets_collection_factory(
secrets=[
{'filename': 'deploy/app.py', 'secret': 'abc'},
{'filename': 'tests/test.py', 'secret': 'def'},
],
)
annotate_secrets(collection)

for filename in collection.data:
for secret in collection.data[filename]:
assert secret.risk_score is not None
assert secret.risk_level is not None
assert isinstance(secret.risk_reasons, list)
assert isinstance(secret.context_tags, list)

def test_annotated_secrets_appear_in_json(self):
collection = secrets_collection_factory(
secrets=[{'filename': 'prod/app.py', 'secret': 'abc'}],
)
annotate_secrets(collection)

for secret in collection.data['prod/app.py']:
output = secret.json()
assert 'risk_score' in output
assert 'risk_level' in output
assert 'risk_reasons' in output
assert 'context_tags' in output


class TestRiskScoringOptIn:

def test_no_risk_metadata_by_default(self):
secret = potential_secret_factory()
output = secret.json()
assert 'risk_score' not in output
assert 'risk_level' not in output
assert 'risk_reasons' not in output
assert 'context_tags' not in output

def test_risk_metadata_present_after_annotation(self):
secret = potential_secret_factory(filename='deploy/app.py')
risk = compute_risk(secret)
secret.risk_score = risk['risk_score']
secret.risk_level = risk['risk_level']
secret.risk_reasons = risk['risk_reasons']
secret.context_tags = risk['context_tags']

output = secret.json()
assert output['risk_score'] >= 60
assert output['risk_level'] in ('LOW', 'MEDIUM', 'HIGH')