Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions agents/agents/agents/agent_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import warnings
import logging
from .chain.streaming_observer import ConsoleStreamObserver, StreamingManager
from .utils.tokenizer import create_tokenizer
from .utils.tokenizer import create_processor, create_tokenizer
from .backend_config import BACKEND_CONFIGS
try:
from verl.protocol import DataProto
Expand All @@ -43,14 +43,14 @@ def __init__(
system_prompt: str = None,
tools: List = None,
max_length: int=8192,
debug: bool = False,
backend: str = "transformers",
backend_config: Any = None,
reward_fn: Callable = None,
log_file: str = "agent",
project_name: str = None,
run_name: str = None,
streaming: str = "console",
debug: bool = False,
**kwargs # To pass other unused arguments
):
"""
Expand All @@ -65,6 +65,7 @@ def __init__(
"""
torch.set_printoptions(threshold=10_000)
self.logger = get_logger(directory=os.path.join(AGENT_DATA_DIR, "debug"), filename=log_file, level="DEBUG" if debug else "INFO")
self.debug = debug
self.backend = backend
self.template = template
self.max_length = max_length
Expand All @@ -87,6 +88,8 @@ def __init__(

# Create appropriate tokenizer for trajectory processing
self.tokenizer = create_tokenizer(model_name_or_path)

self.processor = create_processor(model_name_or_path)

self._reward_fn = reward_fn

Expand All @@ -105,8 +108,7 @@ def __init__(
raise ValueError(f"Streaming mode {streaming} is not supported.")
super().__init__()
if kwargs:
# warnings.warn(f"Unused arguments for agent initialization: {kwargs}")
raise ValueError(f"Unused arguments for agent initialization: {kwargs}")
warnings.warn(f"Unused arguments for agent initialization: {kwargs}")

def _init_llm_engine(self, model_name_or_path: str, backend: str):
if isinstance(model_name_or_path, str):
Expand Down Expand Up @@ -206,7 +208,7 @@ def trajectories(self):

return trajectories

def tokenize_trajectories(self, tokenizer, return_action_mask: bool = False, return_reward_mask: bool = False):
def tokenize_trajectories(self, tokenizer = None, return_reward_mask: bool = False):
if tokenizer is None:
tokenizer = self.tokenizer

Expand Down Expand Up @@ -318,7 +320,7 @@ def rewards(self):


def get_verl_data_proto(self):
inputs, other_info_list = self.tokenize_trajectories(return_action_mask=True, return_reward_mask=True)
inputs, other_info_list = self.tokenize_trajectories(return_reward_mask=True)
group_ids = np.array([info["group_id"] for info in other_info_list], dtype=object)
# Do evaluation here
reward_values, other_values = self.rewards
Expand Down
52 changes: 28 additions & 24 deletions agents/agents/agents/auto.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, List, Optional, Type, Union
from typing import Any, Callable, Dict, List, Optional, Type, Union

from .specialized.think_agent import ThinkAgent
from agents.agents.specialized.openai_agent import OpenAIAgent
Expand All @@ -8,8 +8,7 @@
from .specialized.code_agent import CodeAgent
from ..rewards.reward_base import get_reward_from_name

# Registry for agent types - will be populated dynamically
AGENT_MAPPING = {}


class AutoAgent:
"""
Expand All @@ -22,7 +21,7 @@ class AutoAgent:
These agents are registered automatically. Additional custom agents can be
registered using the register_agent method.
"""

AGENT_MAPPING = {}
@classmethod
def register_agent(cls, agent_type: str, agent_class: Type[BaseAgent]) -> None:
"""
Expand All @@ -32,7 +31,7 @@ def register_agent(cls, agent_type: str, agent_class: Type[BaseAgent]) -> None:
agent_type: The name identifier for the agent type (e.g., 'react', 'code')
agent_class: The agent class to instantiate for this type
"""
AGENT_MAPPING[agent_type.lower()] = agent_class
cls.AGENT_MAPPING[agent_type.lower()] = agent_class

@classmethod
def _get_agent_class(cls, agent_type: str) -> Type[BaseAgent]:
Expand All @@ -50,11 +49,11 @@ def _get_agent_class(cls, agent_type: str) -> Type[BaseAgent]:
"""
agent_type = agent_type.lower()

if agent_type not in AGENT_MAPPING:
available_types = list(AGENT_MAPPING.keys())
if agent_type not in cls.AGENT_MAPPING:
available_types = list(cls.AGENT_MAPPING.keys())
raise ValueError(f"Unknown agent type: '{agent_type}'. Available types: {available_types}")

return AGENT_MAPPING[agent_type]
return cls.AGENT_MAPPING[agent_type]

@classmethod
def from_config(cls, config: Dict[str, Any]) -> BaseAgent:
Expand All @@ -81,27 +80,36 @@ def from_config(cls, config: Dict[str, Any]) -> BaseAgent:
An initialized agent instance.
"""
# Extract and validate required parameters
if config is None:
raise ValueError("Config could not be None")

# construct a copy for agent_kwargs
agent_kwargs = {}
for k, v in config.items():
agent_kwargs[k] = v

required_params = ["agent_type", "template", "tools", "backend"]
missing_params = [param for param in required_params if not config.get(param)]

if missing_params:
raise ValueError(f"Missing required parameters: {', '.join(missing_params)}")

agent_type = config["agent_type"]
agent_kwargs.pop("agent_type")
tools = get_tools_from_names(config["tools"])
agent_class = cls._get_agent_class(agent_type)
reward_name = config.get("reward_name")
if reward_name is not None:
reward_fn = get_reward_from_name(reward_name)
agent_kwargs.pop("reward_name")
else:
reward_fn = None

# construct a copy for agent_kwargs
agent_kwargs = {}
for k, v in config.items():
agent_kwargs[k] = v

agent_kwargs.pop("agent_type")
agent_kwargs['tools'] = tools
if "reward_name" in config and config["reward_name"] is not None:
agent_kwargs.pop("reward_name")
reward_fn = get_reward_from_name(config["reward_name"])
agent_kwargs["reward_fn"] = reward_fn
agent_kwargs['reward_fn'] = reward_fn

if "use_agent" in agent_kwargs:
agent_kwargs.pop("use_agent")

agent = agent_class(**agent_kwargs)

Expand All @@ -114,11 +122,9 @@ def from_pretrained(
agent_type: str,
template: str,
tools: Optional[List] = None,
vllm: bool = False,
debug: bool = False,
log_file: str = "agent",
wrapper: bool = False,
reward_name: Optional[str] = None,
reward_fn: Optional[Callable] = None,
**kwargs
) -> BaseAgent:
"""
Expand Down Expand Up @@ -147,11 +153,9 @@ def from_pretrained(
"model_name_or_path": model_name_or_path,
"template": template,
"tools": tools or [],
"vllm": vllm,
"debug": debug,
"log_file": log_file,
"wrapper": wrapper,
"reward_name": reward_name,
"reward_fn": reward_fn,
**kwargs
}

Expand Down
38 changes: 34 additions & 4 deletions agents/agents/agents/llm_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
import asyncio
from asyncore import loop
from collections import deque
import copy
from functools import partial
import time
from typing import Dict, Any, List, Optional, Callable, AsyncGenerator
import uuid
from .templates.utils import convert_messages_to_openai_format
import numpy as np
from tenacity import retry, stop_after_attempt, wait_exponential
import torch
Expand All @@ -24,8 +24,8 @@
import logging
import PIL


LOGGER = logging.getLogger(__name__)
LOGGER.setLevel(logging.DEBUG)

try:
from verl.protocol import DataProto
Expand Down Expand Up @@ -353,14 +353,29 @@ def _process_inputs(self, prompts: List[str], vision_inputs: Dict[str, List[PIL.

def generate(self, messages_list: str, **kwargs) -> str:
raise NotImplementedError("Async Verl backend does not support sync generation")

def _convert_to_openai_chat_without_tool_call_processing(self, messages: list) -> list:
"""
We use the pure generated content as the history. So we don't want any tool call to be part of the history.
This is used when models are not openai's official models like GPT-4o.
"""
messages = copy.deepcopy(messages)
for message in messages:
if "tool_calls" in message:
del message["tool_calls"]
if "tool_call_id" in message:
del message["tool_call_id"]
if "tool_choice" in message:
del message["tool_choice"]
return messages

async def generate_async(self, messages_list: str, **kwargs) -> str:
"""Generate text from prompt using Verl"""
# We need to build a DataProto from the prompts

generation_config = {}
tensors = torch.ones(len(messages_list), dtype=torch.int64)
messages_list = [convert_messages_to_openai_format(messages) for messages in messages_list]
messages_list = [self._convert_to_openai_chat_without_tool_call_processing(messages) for messages in messages_list]
tools = kwargs.get("tools", None)
tools_list = np.array([tools] * len(messages_list))
data = {"input_ids": tensors, "raw_prompt": np.array(messages_list), "tools": tools_list}
Expand Down Expand Up @@ -457,6 +472,21 @@ async def _call(self, messages: List[List[Dict]], **kw) -> str:
loop = asyncio.get_running_loop()
return await loop.run_in_executor(None, partial(self._blocking_call, messages, **kw))

def _convert_to_openai_chat_without_tool_call_processing(self, messages: list) -> list:
"""
We use the pure generated content as the history. So we don't want any tool call to be part of the history.
This is used when models are not openai's official models like GPT-4o.
TODO: we need to add support for openai models
"""
messages = copy.deepcopy(messages)
for message in messages:
if "tool_calls" in message:
del message["tool_calls"]
if "tool_call_id" in message:
del message["tool_call_id"]
if "tool_choice" in message:
del message["tool_choice"]
return messages

# Public API ‑‑ sync or async depending on caller's context
def async_generate(
Expand All @@ -478,7 +508,7 @@ def async_generate(
else:
messages_list = messages # batch
print(f"[ClientBackend] messages_list: {messages_list}")
messages_list = [convert_messages_to_openai_format(messages) for messages in messages_list]
messages_list = [self._convert_to_openai_chat_without_tool_call_processing(messages) for messages in messages_list]

async def _runner():
tasks = [asyncio.create_task(self._call(_input, **kwargs)) for _input in messages_list]
Expand Down
1 change: 0 additions & 1 deletion agents/agents/agents/react/react_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ def __init__(self,
model_name_or_path=model_name_or_path,
tools=tools,
system_prompt=system_prompt,
max_length=8192,
**kwargs
)

Expand Down
20 changes: 1 addition & 19 deletions agents/agents/agents/templates/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,6 @@ def strip_ansi(s: str) -> str:
return ANSI_RE.sub('', s)


def convert_messages_to_openai_format(messages: list) -> list:
"""
Convert messages to OpenAI format.
TODO: add more processing for other types of content
"""
messages = copy.deepcopy(messages)
for message in messages:
# if "tool_calls" in message:
# del message["tool_calls"]
# if "tool_call_id" in message:
# del message["tool_call_id"]
if "tool_choice" in message:
del message["tool_choice"]
return messages


def convert_messages_to_hf_format(messages: list) -> list:
"""
Convert messages to Hugging Face format.
Expand Down Expand Up @@ -305,9 +289,7 @@ def compare_hf_template(tokenizer, template_name, messages=None, tools=None, add
plain_highlighted_prompt = strip_ansi(highlighted_prompt)
is_equal_between_implemented_prompts = implemented_prompt == plain_highlighted_prompt
jinja_template = chat.template.jinja_template()
# Save jinja template to file
with open("jinja_template.jinja", "w") as f:
f.write(jinja_template)

tokenizer.chat_template = jinja_template
implemented_jinja_prompt = tokenizer.apply_chat_template(messages, tokenize=False, tools=tools, add_generation_prompt=add_generation_prompt)
is_equal_between_jinja_prompts = implemented_jinja_prompt == implemented_prompt
Expand Down
11 changes: 10 additions & 1 deletion agents/agents/agents/utils/tokenizer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from transformers import AutoTokenizer
from transformers import AutoProcessor, AutoTokenizer

def create_tokenizer(model_name_or_path: str):
try:
Expand All @@ -8,3 +8,12 @@ def create_tokenizer(model_name_or_path: str):
tokenizer = None

return tokenizer


def create_processor(model_name_or_path: str):
try:
processor = AutoProcessor.from_pretrained(model_name_or_path)
except OSError:
processor = None

return processor
1 change: 1 addition & 0 deletions agents/tests/unit/agents/mock_tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Mock tests package for agents
Loading