Agent-One-Lab · Reason-Wang · Aug 13, 2025 · Aug 13, 2025
diff --git a/agents/agents/agents/agent_base.py b/agents/agents/agents/agent_base.py
@@ -17,7 +17,7 @@
 import warnings
 import logging
 from .chain.streaming_observer import ConsoleStreamObserver, StreamingManager
-from .utils.tokenizer import create_tokenizer
+from .utils.tokenizer import create_processor, create_tokenizer
 from .backend_config import BACKEND_CONFIGS
 try:
     from verl.protocol import DataProto
@@ -43,14 +43,14 @@ def __init__(
         system_prompt: str = None,
         tools: List = None,
         max_length: int=8192,
-        debug: bool = False,
         backend: str = "transformers",
         backend_config: Any = None,
         reward_fn: Callable = None,
         log_file: str = "agent",
         project_name: str = None,
         run_name: str = None,
         streaming: str = "console",
+        debug: bool = False,
         **kwargs # To pass other unused arguments
     ):
         """
@@ -65,6 +65,7 @@ def __init__(
         """
         torch.set_printoptions(threshold=10_000)
         self.logger = get_logger(directory=os.path.join(AGENT_DATA_DIR, "debug"), filename=log_file, level="DEBUG" if debug else "INFO")
+        self.debug = debug
         self.backend = backend
         self.template = template
         self.max_length = max_length
@@ -87,6 +88,8 @@ def __init__(
 
         # Create appropriate tokenizer for trajectory processing
         self.tokenizer = create_tokenizer(model_name_or_path)
+
+        self.processor = create_processor(model_name_or_path)
 
         self._reward_fn = reward_fn
 
@@ -105,8 +108,7 @@ def __init__(
             raise ValueError(f"Streaming mode {streaming} is not supported.")
         super().__init__()
         if kwargs:
-            # warnings.warn(f"Unused arguments for agent initialization: {kwargs}")
-            raise ValueError(f"Unused arguments for agent initialization: {kwargs}")
+            warnings.warn(f"Unused arguments for agent initialization: {kwargs}")
 
     def _init_llm_engine(self, model_name_or_path: str, backend: str):
         if isinstance(model_name_or_path, str):
@@ -206,7 +208,7 @@ def trajectories(self):
 
         return trajectories
 
-    def tokenize_trajectories(self, tokenizer, return_action_mask: bool = False, return_reward_mask: bool = False):
+    def tokenize_trajectories(self, tokenizer = None, return_reward_mask: bool = False):
         if tokenizer is None:
             tokenizer = self.tokenizer
 
@@ -318,7 +320,7 @@ def rewards(self):
 
 
     def get_verl_data_proto(self):
-        inputs, other_info_list = self.tokenize_trajectories(return_action_mask=True, return_reward_mask=True)
+        inputs, other_info_list = self.tokenize_trajectories(return_reward_mask=True)
         group_ids = np.array([info["group_id"] for info in other_info_list], dtype=object)
         # Do evaluation here
         reward_values, other_values = self.rewards

diff --git a/agents/agents/agents/auto.py b/agents/agents/agents/auto.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Optional, Type, Union
+from typing import Any, Callable, Dict, List, Optional, Type, Union
 
 from .specialized.think_agent import ThinkAgent
 from agents.agents.specialized.openai_agent import OpenAIAgent
@@ -8,8 +8,7 @@
 from .specialized.code_agent import CodeAgent
 from ..rewards.reward_base import get_reward_from_name
 
-# Registry for agent types - will be populated dynamically
-AGENT_MAPPING = {}
+
 
 class AutoAgent:
     """
@@ -22,7 +21,7 @@ class AutoAgent:
     These agents are registered automatically. Additional custom agents can be
     registered using the register_agent method.
     """
-
+    AGENT_MAPPING = {}
     @classmethod
     def register_agent(cls, agent_type: str, agent_class: Type[BaseAgent]) -> None:
         """
@@ -32,7 +31,7 @@ def register_agent(cls, agent_type: str, agent_class: Type[BaseAgent]) -> None:
             agent_type: The name identifier for the agent type (e.g., 'react', 'code')
             agent_class: The agent class to instantiate for this type
         """
-        AGENT_MAPPING[agent_type.lower()] = agent_class
+        cls.AGENT_MAPPING[agent_type.lower()] = agent_class
 
     @classmethod
     def _get_agent_class(cls, agent_type: str) -> Type[BaseAgent]:
@@ -50,11 +49,11 @@ def _get_agent_class(cls, agent_type: str) -> Type[BaseAgent]:
         """
         agent_type = agent_type.lower()
 
-        if agent_type not in AGENT_MAPPING:
-            available_types = list(AGENT_MAPPING.keys())
+        if agent_type not in cls.AGENT_MAPPING:
+            available_types = list(cls.AGENT_MAPPING.keys())
             raise ValueError(f"Unknown agent type: '{agent_type}'. Available types: {available_types}")
 
-        return AGENT_MAPPING[agent_type]
+        return cls.AGENT_MAPPING[agent_type]
 
     @classmethod
     def from_config(cls, config: Dict[str, Any]) -> BaseAgent:
@@ -81,27 +80,36 @@ def from_config(cls, config: Dict[str, Any]) -> BaseAgent:
             An initialized agent instance.
         """
         # Extract and validate required parameters
+        if config is None:
+            raise ValueError("Config could not be None")
+
+        # construct a copy for agent_kwargs
+        agent_kwargs = {}
+        for k, v in config.items():
+            agent_kwargs[k] = v
+
         required_params = ["agent_type", "template", "tools", "backend"]
         missing_params = [param for param in required_params if not config.get(param)]
 
         if missing_params:
             raise ValueError(f"Missing required parameters: {', '.join(missing_params)}")
 
         agent_type = config["agent_type"]
+        agent_kwargs.pop("agent_type")
         tools = get_tools_from_names(config["tools"])
         agent_class = cls._get_agent_class(agent_type)
+        reward_name = config.get("reward_name")
+        if reward_name is not None:
+            reward_fn = get_reward_from_name(reward_name)
+            agent_kwargs.pop("reward_name")
+        else:
+            reward_fn = None
 
-        # construct a copy for agent_kwargs
-        agent_kwargs = {}
-        for k, v in config.items():
-            agent_kwargs[k] = v
-
-        agent_kwargs.pop("agent_type")
         agent_kwargs['tools'] = tools
-        if "reward_name" in config and config["reward_name"] is not None:
-            agent_kwargs.pop("reward_name")
-            reward_fn = get_reward_from_name(config["reward_name"])
-            agent_kwargs["reward_fn"] = reward_fn
+        agent_kwargs['reward_fn'] = reward_fn
+
+        if "use_agent" in agent_kwargs:
+            agent_kwargs.pop("use_agent")
 
         agent = agent_class(**agent_kwargs)
 
@@ -114,11 +122,9 @@ def from_pretrained(
         agent_type: str,
         template: str,
         tools: Optional[List] = None,
-        vllm: bool = False,
         debug: bool = False,
         log_file: str = "agent",
-        wrapper: bool = False,
-        reward_name: Optional[str] = None,
+        reward_fn: Optional[Callable] = None,
         **kwargs
     ) -> BaseAgent:
         """
@@ -147,11 +153,9 @@ def from_pretrained(
             "model_name_or_path": model_name_or_path,
             "template": template,
             "tools": tools or [],
-            "vllm": vllm,
             "debug": debug,
             "log_file": log_file,
-            "wrapper": wrapper,
-            "reward_name": reward_name,
+            "reward_fn": reward_fn,
             **kwargs
         }
 

diff --git a/agents/agents/agents/llm_backend.py b/agents/agents/agents/llm_backend.py
@@ -5,11 +5,11 @@
 import asyncio
 from asyncore import loop
 from collections import deque
+import copy
 from functools import partial
 import time
 from typing import Dict, Any, List, Optional, Callable, AsyncGenerator
 import uuid
-from .templates.utils import convert_messages_to_openai_format
 import numpy as np
 from tenacity import retry, stop_after_attempt, wait_exponential
 import torch
@@ -24,8 +24,8 @@
 import logging
 import PIL
 
+
 LOGGER = logging.getLogger(__name__)
-LOGGER.setLevel(logging.DEBUG)
 
 try:
     from verl.protocol import DataProto
@@ -353,14 +353,29 @@ def _process_inputs(self, prompts: List[str], vision_inputs: Dict[str, List[PIL.
 
     def generate(self, messages_list: str, **kwargs) -> str:
         raise NotImplementedError("Async Verl backend does not support sync generation")
+
+    def _convert_to_openai_chat_without_tool_call_processing(self, messages: list) -> list:
+        """
+        We use the pure generated content as the history. So we don't want any tool call to be part of the history.
+        This is used when models are not openai's official models like GPT-4o.
+        """
+        messages = copy.deepcopy(messages)
+        for message in messages:
+            if "tool_calls" in message:
+                del message["tool_calls"]
+            if "tool_call_id" in message:
+                del message["tool_call_id"]
+            if "tool_choice" in message:
+                del message["tool_choice"]
+        return messages
 
     async def generate_async(self, messages_list: str, **kwargs) -> str:
         """Generate text from prompt using Verl"""
         # We need to build a DataProto from the prompts
 
         generation_config = {}
         tensors = torch.ones(len(messages_list), dtype=torch.int64)
-        messages_list = [convert_messages_to_openai_format(messages) for messages in messages_list]
+        messages_list = [self._convert_to_openai_chat_without_tool_call_processing(messages) for messages in messages_list]
         tools = kwargs.get("tools", None)
         tools_list = np.array([tools] * len(messages_list))
         data = {"input_ids": tensors, "raw_prompt": np.array(messages_list), "tools": tools_list}
@@ -457,6 +472,21 @@ async def _call(self, messages: List[List[Dict]], **kw) -> str:
             loop = asyncio.get_running_loop()
             return await loop.run_in_executor(None, partial(self._blocking_call, messages, **kw))
 
+    def _convert_to_openai_chat_without_tool_call_processing(self, messages: list) -> list:
+        """
+        We use the pure generated content as the history. So we don't want any tool call to be part of the history.
+        This is used when models are not openai's official models like GPT-4o.
+        TODO: we need to add support for openai models
+        """
+        messages = copy.deepcopy(messages)
+        for message in messages:
+            if "tool_calls" in message:
+                del message["tool_calls"]
+            if "tool_call_id" in message:
+                del message["tool_call_id"]
+            if "tool_choice" in message:
+                del message["tool_choice"]
+        return messages
 
     # Public API ‑‑ sync or async depending on caller's context
     def async_generate(
@@ -478,7 +508,7 @@ def async_generate(
         else:
             messages_list = messages     # batch
         print(f"[ClientBackend] messages_list: {messages_list}")
-        messages_list = [convert_messages_to_openai_format(messages) for messages in messages_list]
+        messages_list = [self._convert_to_openai_chat_without_tool_call_processing(messages) for messages in messages_list]
 
         async def _runner():
             tasks = [asyncio.create_task(self._call(_input, **kwargs)) for _input in messages_list]

diff --git a/agents/agents/agents/react/react_agent.py b/agents/agents/agents/react/react_agent.py
@@ -123,7 +123,6 @@ def __init__(self,
             model_name_or_path=model_name_or_path,
             tools=tools,
             system_prompt=system_prompt,
-            max_length=8192,
             **kwargs
         )
 

diff --git a/agents/agents/agents/templates/utils.py b/agents/agents/agents/templates/utils.py
@@ -22,22 +22,6 @@ def strip_ansi(s: str) -> str:
     return ANSI_RE.sub('', s)
 
 
-def convert_messages_to_openai_format(messages: list) -> list:
-    """
-    Convert messages to OpenAI format.
-    TODO: add more processing for other types of content
-    """
-    messages = copy.deepcopy(messages)
-    for message in messages:
-        # if "tool_calls" in message:
-        #     del message["tool_calls"]
-        # if "tool_call_id" in message:
-        #     del message["tool_call_id"]
-        if "tool_choice" in message:
-            del message["tool_choice"]
-    return messages
-
-
 def convert_messages_to_hf_format(messages: list) -> list:
     """
     Convert messages to Hugging Face format.
@@ -305,9 +289,7 @@ def compare_hf_template(tokenizer, template_name, messages=None, tools=None, add
     plain_highlighted_prompt = strip_ansi(highlighted_prompt)
     is_equal_between_implemented_prompts = implemented_prompt == plain_highlighted_prompt
     jinja_template = chat.template.jinja_template()
-    # Save jinja template to file
-    with open("jinja_template.jinja", "w") as f:
-        f.write(jinja_template)
+
     tokenizer.chat_template = jinja_template
     implemented_jinja_prompt = tokenizer.apply_chat_template(messages, tokenize=False, tools=tools, add_generation_prompt=add_generation_prompt)
     is_equal_between_jinja_prompts = implemented_jinja_prompt == implemented_prompt

diff --git a/agents/agents/agents/utils/tokenizer.py b/agents/agents/agents/utils/tokenizer.py
@@ -1,4 +1,4 @@
-from transformers import AutoTokenizer
+from transformers import AutoProcessor, AutoTokenizer
 
 def create_tokenizer(model_name_or_path: str):
     try:
@@ -8,3 +8,12 @@ def create_tokenizer(model_name_or_path: str):
         tokenizer = None
 
     return tokenizer
+
+
+def create_processor(model_name_or_path: str):
+    try:
+        processor = AutoProcessor.from_pretrained(model_name_or_path)
+    except OSError:
+        processor = None
+
+    return processor
diff --git a/agents/tests/unit/agents/mock_tests/__init__.py b/agents/tests/unit/agents/mock_tests/__init__.py
@@ -0,0 +1 @@
+# Mock tests package for agents