Skip to content

Commit 637f72e

Browse files
jwesleyeclaude
andcommitted
feat: Add configuration validation module (Issue #6)
Implemented comprehensive configuration validation toolkit with 9 new tools for validating YAML/TOML/JSON configs, detecting secrets, and scanning for insecure settings. This addresses GitHub issue #6 for v0.8.0 milestone. ## New Module: config (9 tools) ### Validation Functions (7 tools) - validate_yaml_syntax: Validate YAML syntax using PyYAML - validate_toml_syntax: Validate TOML syntax using tomllib/tomli - validate_json_syntax: Validate JSON syntax using stdlib json - validate_json_schema: Validate JSON against schemas (jsonschema) - validate_github_actions_config: Validate GitHub Actions workflows - check_dependency_conflicts: Detect Python dependency conflicts - validate_version_specifier: Validate PEP 440 version specifiers ### Security Functions (2 tools) - scan_config_for_secrets: Detect secrets in config files - detect_insecure_settings: Scan for security misconfigurations ## Optional Dependencies Added 5 new optional dependencies for enhanced functionality: - PyYAML>=6.0.0: YAML parsing - tomli>=2.0.0: TOML parsing (Python <3.11) - jsonschema>=4.0.0: JSON schema validation - packaging>=21.0: Version parsing and conflict detection - detect-secrets>=1.5.0: Enhanced secret scanning Install with: pip install coding-open-agent-tools[config] ## Implementation Details - Follows project philosophy: validation/parsing over code generation - All functions use @strands_tool decorator for framework compatibility - Comprehensive error handling with fallbacks for missing dependencies - 52 passing tests with 100% coverage of new code - Full mypy and ruff compliance ## Tool Count Update Total tools: 258 → 267 (+9) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 7481c87 commit 637f72e

14 files changed

Lines changed: 1479 additions & 9 deletions

File tree

pyproject.toml

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,15 @@ dependencies = [
4646

4747
[project.optional-dependencies]
4848
# All optional features
49-
all = ["esprima>=4.0.0", "tree-sitter-language-pack>=0.9.0"]
49+
all = [
50+
"esprima>=4.0.0",
51+
"tree-sitter-language-pack>=0.9.0",
52+
"PyYAML>=6.0.0",
53+
"tomli>=2.0.0; python_version < '3.11'",
54+
"jsonschema>=4.0.0",
55+
"packaging>=21.0",
56+
"detect-secrets>=1.5.0",
57+
]
5058
# JavaScript/TypeScript navigation support
5159
javascript = ["esprima>=4.0.0"]
5260
# Java navigation support
@@ -61,6 +69,15 @@ cpp = ["tree-sitter-language-pack>=0.9.0"]
6169
csharp = ["tree-sitter-language-pack>=0.9.0"]
6270
# Ruby navigation support
6371
ruby = ["tree-sitter-language-pack>=0.9.0"]
72+
# Configuration validation support
73+
config = [
74+
"PyYAML>=6.0.0",
75+
"tomli>=2.0.0; python_version < '3.11'",
76+
"jsonschema>=4.0.0",
77+
"packaging>=21.0",
78+
]
79+
# Enhanced security scanning for configs
80+
config-security = ["detect-secrets>=1.5.0"]
6481
# Strands integration testing
6582
strands = [
6683
"strands>=0.1.0",
@@ -82,6 +99,12 @@ dev = [
8299
"esprima>=4.0.0",
83100
# Java support for testing
84101
"tree-sitter-language-pack>=0.9.0",
102+
# Config validation support for testing
103+
"PyYAML>=6.0.0",
104+
"tomli>=2.0.0; python_version < '3.11'",
105+
"jsonschema>=4.0.0",
106+
"packaging>=21.0",
107+
"detect-secrets>=1.5.0",
85108
# Strands testing dependencies
86109
"strands>=0.1.0",
87110
"anthropic>=0.25.0",
@@ -149,7 +172,7 @@ follow_imports = "skip" # Skip type checking of imported modules
149172
ignore_missing_imports = true # Ignore missing imports in dependencies
150173

151174
[[tool.mypy.overrides]]
152-
module = ["strands", "strands.*", "pkg_resources", "basic_open_agent_tools", "basic_open_agent_tools.*", "google.adk", "google.adk.*", "langgraph", "langgraph.*", "detect_secrets", "detect_secrets.*", "mcp", "mcp.*"]
175+
module = ["strands", "strands.*", "pkg_resources", "basic_open_agent_tools", "basic_open_agent_tools.*", "google.adk", "google.adk.*", "langgraph", "langgraph.*", "detect_secrets", "detect_secrets.*", "mcp", "mcp.*", "yaml", "tomli", "jsonschema"]
153176
ignore_missing_imports = true
154177

155178
[tool.pytest.ini_options]

src/coding_open_agent_tools/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# Import migrated modules
1313
from . import (
1414
analysis,
15+
config,
1516
cpp,
1617
csharp,
1718
database,
@@ -32,6 +33,7 @@
3233
get_tool_info,
3334
list_all_available_tools,
3435
load_all_analysis_tools,
36+
load_all_config_tools,
3537
load_all_cpp_tools,
3638
load_all_csharp_tools,
3739
load_all_database_tools,
@@ -63,6 +65,7 @@
6365
__all__: list[str] = [
6466
# Modules
6567
"analysis",
68+
"config",
6669
"cpp",
6770
"csharp",
6871
"database",
@@ -80,6 +83,7 @@
8083
"get_tool_info",
8184
"list_all_available_tools",
8285
"load_all_analysis_tools",
86+
"load_all_config_tools",
8387
"load_all_cpp_tools",
8488
"load_all_csharp_tools",
8589
"load_all_database_tools",
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
"""Configuration validation tools.
2+
3+
Provides validation for YAML, TOML, JSON, CI/CD configs, dependency conflicts,
4+
and security scanning for configuration files.
5+
"""
6+
7+
from .security import (
8+
detect_insecure_settings,
9+
scan_config_for_secrets,
10+
)
11+
from .validation import (
12+
check_dependency_conflicts,
13+
validate_github_actions_config,
14+
validate_json_schema,
15+
validate_json_syntax,
16+
validate_toml_syntax,
17+
validate_version_specifier,
18+
validate_yaml_syntax,
19+
)
20+
21+
__all__ = [
22+
# Security
23+
"detect_insecure_settings",
24+
"scan_config_for_secrets",
25+
# Validation
26+
"check_dependency_conflicts",
27+
"validate_github_actions_config",
28+
"validate_json_schema",
29+
"validate_json_syntax",
30+
"validate_toml_syntax",
31+
"validate_version_specifier",
32+
"validate_yaml_syntax",
33+
]
Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
"""Configuration security scanning functions.
2+
3+
Provides secret detection and insecure settings analysis for configuration files.
4+
"""
5+
6+
import re
7+
from typing import Any, Callable
8+
9+
try:
10+
from strands import tool as strands_tool
11+
except ImportError:
12+
13+
def strands_tool(func: Callable[..., Any]) -> Callable[..., Any]: # type: ignore[no-redef]
14+
return func
15+
16+
17+
# Common patterns for secrets in configuration files
18+
SECRET_PATTERNS = {
19+
"aws_access_key": r"(?i)(aws_access_key_id|aws_access_key)\s*[:=]\s*['\"]?([A-Z0-9]{20})['\"]?",
20+
"aws_secret_key": r"(?i)(aws_secret_access_key|aws_secret_key)\s*[:=]\s*['\"]?([A-Za-z0-9/+=]{40})['\"]?",
21+
"api_key": r"(?i)(api[_-]?key|apikey)\s*[:=]\s*['\"]?([A-Za-z0-9_\-]{20,})['\"]?",
22+
"password": r"(?i)(password|passwd|pwd)\s*[:=]\s*['\"]?([^\s'\";]{8,})['\"]?",
23+
"secret": r"(?i)(secret)\s*[:=]\s*['\"]?([A-Za-z0-9_\-]{16,})['\"]?",
24+
"token": r"(?i)(token|auth_token)\s*[:=]\s*['\"]?([A-Za-z0-9_\-\.]{20,})['\"]?",
25+
"private_key": r"-----BEGIN (?:RSA |EC )?PRIVATE KEY-----",
26+
"github_token": r"(?i)gh[pousr]_[A-Za-z0-9_]{36,}",
27+
"slack_token": r"xox[baprs]-[0-9]{10,13}-[0-9]{10,13}-[A-Za-z0-9]{24,}",
28+
"slack_webhook": r"https://hooks\.slack\.com/services/T[A-Z0-9]{8,}/B[A-Z0-9]{8,}/[A-Za-z0-9]{24,}",
29+
"connection_string": r"(?i)(mongodb|postgresql|mysql|redis)://[^:]+:[^@]+@",
30+
}
31+
32+
# Patterns for insecure configuration settings
33+
INSECURE_PATTERNS = {
34+
"debug_enabled": {
35+
"pattern": r"(?i)(debug|DEBUG)\s*[:=]\s*['\"]?(true|True|1)['\"]?",
36+
"severity": "medium",
37+
"message": "Debug mode enabled in configuration",
38+
},
39+
"ssl_disabled": {
40+
"pattern": r"(?i)(ssl_verify|verify_ssl|SSL_VERIFY)\s*[:=]\s*['\"]?(false|False|0)['\"]?",
41+
"severity": "high",
42+
"message": "SSL verification disabled",
43+
},
44+
"insecure_protocol": {
45+
"pattern": r"(?i)(protocol|scheme)\s*[:=]\s*['\"]?(http|ftp)['\"]?(?!\s*s)",
46+
"severity": "medium",
47+
"message": "Insecure protocol (HTTP/FTP) used instead of HTTPS/FTPS",
48+
},
49+
"wildcard_cors": {
50+
"pattern": r"(?i)(cors_origin|access[_-]control[_-]allow[_-]origin)\s*[:=]\s*['\"]?\*['\"]?",
51+
"severity": "high",
52+
"message": "CORS allows all origins (*)",
53+
},
54+
"permissive_permissions": {
55+
"pattern": r"(?i)(permissions|mode|chmod)\s*[:=]\s*['\"]?(777|666)['\"]?",
56+
"severity": "high",
57+
"message": "Overly permissive file permissions (777/666)",
58+
},
59+
"default_credentials": {
60+
"pattern": r"(?i)(password|passwd)\s*[:=]\s*['\"]?(admin|password|123456|default)['\"]?",
61+
"severity": "critical",
62+
"message": "Default or weak credentials detected",
63+
},
64+
"exposed_admin": {
65+
"pattern": r"(?i)(admin_enabled|enable_admin)\s*[:=]\s*['\"]?(true|True|1)['\"]?",
66+
"severity": "medium",
67+
"message": "Admin interface enabled",
68+
},
69+
"insecure_session": {
70+
"pattern": r"(?i)(session_cookie_secure|cookie_secure)\s*[:=]\s*['\"]?(false|False|0)['\"]?",
71+
"severity": "medium",
72+
"message": "Secure cookie flag not set",
73+
},
74+
}
75+
76+
77+
@strands_tool
78+
def scan_config_for_secrets(
79+
config_content: str, use_detect_secrets: str
80+
) -> dict[str, str]:
81+
"""Scan configuration content for potential secrets.
82+
83+
Uses detect-secrets library if available and requested, otherwise falls
84+
back to basic regex pattern matching.
85+
86+
Args:
87+
config_content: Configuration file content to scan
88+
use_detect_secrets: "true" to use detect-secrets library, "false" for regex
89+
90+
Returns:
91+
Dictionary with:
92+
- secrets_found: "true" or "false"
93+
- secret_count: Number of potential secrets detected
94+
- secret_types: Comma-separated list of secret types found
95+
- detection_method: "detect-secrets" or "regex"
96+
- details: Additional details about findings
97+
98+
Raises:
99+
TypeError: If arguments are not strings
100+
ValueError: If config_content is empty or use_detect_secrets not "true"/"false"
101+
"""
102+
if not isinstance(config_content, str):
103+
raise TypeError("config_content must be a string")
104+
if not isinstance(use_detect_secrets, str):
105+
raise TypeError("use_detect_secrets must be a string")
106+
107+
if not config_content.strip():
108+
raise ValueError("config_content cannot be empty")
109+
if use_detect_secrets not in ("true", "false"):
110+
raise ValueError('use_detect_secrets must be "true" or "false"')
111+
112+
# Try detect-secrets if requested
113+
if use_detect_secrets == "true":
114+
try:
115+
from detect_secrets import SecretsCollection # type: ignore[import-untyped]
116+
from detect_secrets.settings import (
117+
default_settings, # type: ignore[import-untyped]
118+
)
119+
120+
secrets = SecretsCollection()
121+
with default_settings():
122+
secrets.scan_file("config", config_content)
123+
124+
if secrets.data:
125+
secret_types = set()
126+
for file_secrets in secrets.data.values():
127+
for secret in file_secrets.values():
128+
secret_types.add(secret.type)
129+
130+
return {
131+
"secrets_found": "true",
132+
"secret_count": str(len(list(secrets))),
133+
"secret_types": ", ".join(sorted(secret_types)),
134+
"detection_method": "detect-secrets",
135+
"details": f"Found {len(list(secrets))} potential secrets using detect-secrets",
136+
}
137+
else:
138+
return {
139+
"secrets_found": "false",
140+
"secret_count": "0",
141+
"secret_types": "",
142+
"detection_method": "detect-secrets",
143+
"details": "No secrets detected",
144+
}
145+
except ImportError:
146+
# Fall through to regex-based detection
147+
pass
148+
149+
# Regex-based detection
150+
found_secrets = []
151+
for secret_type, pattern in SECRET_PATTERNS.items():
152+
matches = re.finditer(pattern, config_content)
153+
for _match in matches:
154+
found_secrets.append(secret_type)
155+
156+
if found_secrets:
157+
# Count unique types
158+
unique_types = sorted(set(found_secrets))
159+
return {
160+
"secrets_found": "true",
161+
"secret_count": str(len(found_secrets)),
162+
"secret_types": ", ".join(unique_types),
163+
"detection_method": "regex",
164+
"details": f"Found {len(found_secrets)} potential secrets across {len(unique_types)} types",
165+
}
166+
else:
167+
return {
168+
"secrets_found": "false",
169+
"secret_count": "0",
170+
"secret_types": "",
171+
"detection_method": "regex",
172+
"details": "No secrets detected using regex patterns",
173+
}
174+
175+
176+
@strands_tool
177+
def detect_insecure_settings(config_content: str) -> dict[str, str]:
178+
"""Detect insecure configuration settings using pattern matching.
179+
180+
Scans for common security misconfigurations like debug mode enabled,
181+
SSL disabled, wildcard CORS, default credentials, etc.
182+
183+
Args:
184+
config_content: Configuration file content to analyze
185+
186+
Returns:
187+
Dictionary with:
188+
- issues_found: "true" or "false"
189+
- issue_count: Number of insecure settings detected
190+
- critical_count: Number of critical severity issues
191+
- high_count: Number of high severity issues
192+
- medium_count: Number of medium severity issues
193+
- issue_summary: Brief summary of issues found
194+
195+
Raises:
196+
TypeError: If config_content is not a string
197+
ValueError: If config_content is empty
198+
"""
199+
if not isinstance(config_content, str):
200+
raise TypeError("config_content must be a string")
201+
if not config_content.strip():
202+
raise ValueError("config_content cannot be empty")
203+
204+
issues = []
205+
severity_counts = {"critical": 0, "high": 0, "medium": 0}
206+
207+
for setting_name, setting_info in INSECURE_PATTERNS.items():
208+
pattern = setting_info["pattern"]
209+
severity = setting_info["severity"]
210+
message = setting_info["message"]
211+
212+
matches = re.finditer(pattern, config_content)
213+
for match in matches:
214+
issues.append(
215+
{
216+
"setting": setting_name,
217+
"severity": severity,
218+
"message": message,
219+
"line": config_content[: match.start()].count("\n") + 1,
220+
}
221+
)
222+
severity_counts[severity] += 1
223+
224+
if issues:
225+
issue_messages = [
226+
f"{str(issue['severity']).upper()}: {issue['message']}"
227+
for issue in issues[:3]
228+
]
229+
summary = "; ".join(issue_messages)
230+
if len(issues) > 3:
231+
summary += f" (and {len(issues) - 3} more)"
232+
233+
return {
234+
"issues_found": "true",
235+
"issue_count": str(len(issues)),
236+
"critical_count": str(severity_counts["critical"]),
237+
"high_count": str(severity_counts["high"]),
238+
"medium_count": str(severity_counts["medium"]),
239+
"issue_summary": summary,
240+
}
241+
else:
242+
return {
243+
"issues_found": "false",
244+
"issue_count": "0",
245+
"critical_count": "0",
246+
"high_count": "0",
247+
"medium_count": "0",
248+
"issue_summary": "No insecure settings detected",
249+
}

0 commit comments

Comments
 (0)