Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
325 changes: 160 additions & 165 deletions bluebox/agents/routine_discovery_agent_beta.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion bluebox/agents/specialists/abstract_specialist.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class AutonomousConfig(NamedTuple):
Configuration for autonomous specialist runs. Helps manage their "lifecycles."
"""
min_iterations: int = 3 # Minimum iterations before finalize tools become available
max_iterations: int = 10 # Maximum iterations before loop exits (returns None if not finalized)
max_iterations: int = 20 # Maximum iterations before loop exits (returns None if not finalized)


class AbstractSpecialist(AbstractAgent):
Expand Down
8 changes: 4 additions & 4 deletions bluebox/agents/specialists/network_specialist.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def _search_responses_by_terms(self, terms: list[str]) -> dict[str, Any]:
Search RESPONSE bodies by a list of terms.

Searches HTML/JSON response bodies (excludes JS, images, media) and returns
top 10-20 entries ranked by relevance score. Pass 20-30 search terms for best results.
top 50 entries ranked by relevance score. Pass 20-30 search terms for best results.

Args:
terms: List of 20-30 search terms to look for in response bodies.
Expand All @@ -250,7 +250,7 @@ def _search_responses_by_terms(self, terms: list[str]) -> dict[str, Any]:
if not terms:
return {"error": "No search terms provided"}

results = self._network_data_loader.search_entries_by_terms(terms, top_n=20)
results = self._network_data_loader.search_entries_by_terms(terms, top_n=50)

if not results:
return {
Expand Down Expand Up @@ -450,7 +450,7 @@ def _search_requests_by_terms(
return {
"terms_searched": len(terms),
"results_found": len(results),
"results": results[:20], # Top 20
"results": results[:50], # Top 50
}

@agent_tool()
Expand Down Expand Up @@ -502,5 +502,5 @@ def _search_response_bodies(
"case_sensitive": case_sensitive,
"regex": regex,
"results_found": len(results),
"results": results[:20], # Top 20
"results": results[:50], # Top 50
}
96 changes: 84 additions & 12 deletions bluebox/agents/specialists/value_trace_resolver_specialist.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,26 +85,98 @@ class ValueTraceResolverSpecialist(AbstractSpecialist):
## Your Mission

Find the ORIGINAL source of a token/value and trace how it propagates.
**BE PERSISTENT** - try MULTIPLE search strategies before giving up!

## Process
## Search Strategy (try ALL of these!)

1. **Search**: Use `search_everywhere` to find all occurrences
2. **Analyze**: Examine entries for context and timestamps
3. **Trace**: Determine the flow (e.g., API response -> cookie -> request header)
4. **Finalize**: Call the appropriate finalize tool with your findings
1. **Full Value Search**: Search for the COMPLETE exact value
2. **Strip Prefixes**: If value has "Bearer ", "Basic ", etc., strip it and search the token alone
3. **Partial Search**: Search for just the FIRST 50-100 characters (for very long tokens)
4. **Case Variations**: Try case-sensitive=False if case-sensitive=True fails
5. **Truncated Search**: JWT tokens have 3 parts (header.payload.signature) - try searching each part
6. **Multiple Stores**: Check network, storage, AND window properties separately
7. **Related Values**: Look for field names like "token", "accessToken", "auth", "jwt" near the timestamp

## Multi-Attempt Process

**DO NOT give up after one search!** Follow this workflow:

1. **Attempt 1**: `search_everywhere(full_value, case_sensitive=True)`
2. **Attempt 2**: If Authorization/Bearer token, strip "Bearer " prefix and search again
3. **Attempt 3**: Search first 80 chars of the token (helps with very long values)
4. **Attempt 4**: Try case-insensitive: `search_everywhere(value, case_sensitive=False)`
5. **Attempt 5**: For JWTs, extract and search just the payload section
6. **Attempt 6**: Use `search_network_responses` with just a unique substring
7. **Analyze timestamps**: Look at transactions around the time the value was first used

**ONLY finalize with failure after trying AT LEAST 5 different search approaches!**

## What to Look For

- First occurrence (by timestamp) is often the original source
- Network responses often set values that end up in storage
- Storage values (cookies) are often sent in subsequent request headers
- Look for API endpoints with keywords: "token", "auth", "login", "session"

**CRITICAL - Filter for Successful Responses:**
- When you find a token in network responses, CHECK THE STATUS CODE!
- ONLY use responses with status 200-299 (successful)
- IGNORE responses with status 400-599 (errors like 403 Forbidden, 404 Not Found)
- If you see "Access Denied" or "Forbidden", that's a FAILED request - keep searching!
- Look for the same endpoint with status 201 (Created) or 200 (OK)

## Authorization Header Handling

**CRITICAL**: Authorization tokens can be in the network data, storage, or window properties!
- Tokens are stored WITHOUT prefix in responses: `{"token": "eyJh..."}`
- Tokens are used WITH prefix in requests: `"Authorization: Bearer eyJh..."`
- Search for the token WITHOUT "Bearer " if initial search fails
- Try searching just "eyJh" (first 20 chars) to find the response

## Source Preference

**PREFER NETWORK (transaction) SOURCES over storage.** When a value appears in
both a prior transaction response AND browser storage (cookie, localStorage,
sessionStorage), report the transaction response as the primary source.
Storage may be empty in a fresh browser session, making it unreliable.

## Iteration Budget

You have up to 20 iterations. Use them ALL if needed! Don't give up early.

## How to Finalize (IMPORTANT!)

When you've found the origin, call the appropriate finalize tool:

**If output schema is provided** (check your system prompt for "Expected Output Schema"):
- Use `finalize_with_output(output={...})` with a DICT matching the schema
- Example with schema:
```python
finalize_with_output(output={
"origin_summary": "Token from /api/token endpoint at path data.token",
"primary_source": {
"type": "network_response",
"request_id": "interception-job-716.0",
"url": "https://api.example.com/token",
"method": "POST",
"location": "data.token"
}
})
```

**If NO output schema** (no schema section in system prompt):
- Use `finalize_result(output={...})` with a DICT (not a string!)
- Example without schema:
```python
finalize_result(output={
"summary": "Token originates from...",
"source_transaction": "interception-job-716.0",
"path": "data.token"
})
```

**NEVER** call finalize without the `output` parameter!
**NEVER** pass a string to `output` - it must be a dict/object!
""").strip()

## Magic methods
Expand Down Expand Up @@ -247,7 +319,7 @@ def _search_everywhere(
results["network"] = {
"found": len(network_results) > 0,
"count": len(network_results),
"matches": network_results[:10],
"matches": network_results[:30],
}
else:
results["network"] = {"available": False}
Expand All @@ -260,7 +332,7 @@ def _search_everywhere(
results["storage"] = {
"found": len(storage_results) > 0,
"count": len(storage_results),
"matches": storage_results[:10],
"matches": storage_results[:30],
}
else:
results["storage"] = {"available": False}
Expand All @@ -273,7 +345,7 @@ def _search_everywhere(
results["window_properties"] = {
"found": len(window_results) > 0,
"count": len(window_results),
"matches": window_results[:10],
"matches": window_results[:30],
}
else:
results["window_properties"] = {"available": False}
Expand Down Expand Up @@ -322,7 +394,7 @@ def _search_in_network(
return {
"value_searched": value,
"results_found": len(results),
"results": results[:20],
"results": results[:50],
}

@agent_tool(availability=lambda self: self._storage_data_loader is not None)
Expand Down Expand Up @@ -354,7 +426,7 @@ def _search_in_storage(
return {
"value_searched": value,
"results_found": len(results),
"results": results[:20],
"results": results[:50],
}

@agent_tool(availability=lambda self: self._window_property_data_loader is not None)
Expand Down Expand Up @@ -386,7 +458,7 @@ def _search_in_window_props(
return {
"value_searched": value,
"results_found": len(results),
"results": results[:20],
"results": results[:50],
}

@agent_tool(availability=lambda self: self._network_data_loader is not None)
Expand Down Expand Up @@ -502,7 +574,7 @@ def _get_storage_by_key(self, key: str) -> dict[str, Any]:
return {
"key": key,
"entries_found": len(entries),
"entries": [e.model_dump() for e in entries[:20]],
"entries": [e.model_dump() for e in entries[:50]],
}

@agent_tool()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,7 @@ def test_default_values(self) -> None:
"""AutonomousConfig has sensible defaults."""
config = AutonomousConfig()
assert config.min_iterations == 3
assert config.max_iterations == 10
assert config.max_iterations == 20

def test_custom_values(self) -> None:
"""AutonomousConfig accepts custom values."""
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_code_execution_sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,7 +841,7 @@ def test_docker_security_flags(self) -> None:
assert "--read-only" in docker_cmd
assert "--memory" in docker_cmd
assert "--user" in docker_cmd
assert "nobody" in docker_cmd
assert f"{os.getuid()}:{os.getgid()}" in docker_cmd
assert "--security-opt" in docker_cmd
assert "no-new-privileges" in docker_cmd

Expand Down