From 5403dbf702a5d6241594577e2664ec31cc28dd7a Mon Sep 17 00:00:00 2001 From: Reason-Wang Date: Wed, 13 Aug 2025 12:21:29 +0000 Subject: [PATCH] Add mock tests, fix training bug --- agents/agents/agents/agent_base.py | 14 +- agents/agents/agents/auto.py | 52 +-- agents/agents/agents/llm_backend.py | 38 +- agents/agents/agents/react/react_agent.py | 1 - agents/agents/agents/templates/utils.py | 20 +- agents/agents/agents/utils/tokenizer.py | 11 +- .../tests/unit/agents/mock_tests/__init__.py | 1 + .../tests/unit/agents/mock_tests/conftest.py | 155 ++++++++ .../mock_tests/test_mock_agent_integration.py | 324 ++++++++++++++++ .../agents/mock_tests/test_mock_auto_agent.py | 294 +++++++++++++++ .../agents/mock_tests/test_mock_code_agent.py | 251 +++++++++++++ .../mock_tests/test_mock_react_agent.py | 352 ++++++++++++++++++ agents/tests/unit/agents/test_vision_agent.py | 17 +- agents/tests/unit/tools/test_code_tool.py | 18 +- verl | 2 +- 15 files changed, 1477 insertions(+), 73 deletions(-) create mode 100644 agents/tests/unit/agents/mock_tests/__init__.py create mode 100644 agents/tests/unit/agents/mock_tests/conftest.py create mode 100644 agents/tests/unit/agents/mock_tests/test_mock_agent_integration.py create mode 100644 agents/tests/unit/agents/mock_tests/test_mock_auto_agent.py create mode 100644 agents/tests/unit/agents/mock_tests/test_mock_code_agent.py create mode 100644 agents/tests/unit/agents/mock_tests/test_mock_react_agent.py diff --git a/agents/agents/agents/agent_base.py b/agents/agents/agents/agent_base.py index e878a79..43b01f2 100644 --- a/agents/agents/agents/agent_base.py +++ b/agents/agents/agents/agent_base.py @@ -17,7 +17,7 @@ import warnings import logging from .chain.streaming_observer import ConsoleStreamObserver, StreamingManager -from .utils.tokenizer import create_tokenizer +from .utils.tokenizer import create_processor, create_tokenizer from .backend_config import BACKEND_CONFIGS try: from verl.protocol import DataProto @@ -43,7 +43,6 @@ def __init__( system_prompt: str = None, tools: List = None, max_length: int=8192, - debug: bool = False, backend: str = "transformers", backend_config: Any = None, reward_fn: Callable = None, @@ -51,6 +50,7 @@ def __init__( project_name: str = None, run_name: str = None, streaming: str = "console", + debug: bool = False, **kwargs # To pass other unused arguments ): """ @@ -65,6 +65,7 @@ def __init__( """ torch.set_printoptions(threshold=10_000) self.logger = get_logger(directory=os.path.join(AGENT_DATA_DIR, "debug"), filename=log_file, level="DEBUG" if debug else "INFO") + self.debug = debug self.backend = backend self.template = template self.max_length = max_length @@ -87,6 +88,8 @@ def __init__( # Create appropriate tokenizer for trajectory processing self.tokenizer = create_tokenizer(model_name_or_path) + + self.processor = create_processor(model_name_or_path) self._reward_fn = reward_fn @@ -105,8 +108,7 @@ def __init__( raise ValueError(f"Streaming mode {streaming} is not supported.") super().__init__() if kwargs: - # warnings.warn(f"Unused arguments for agent initialization: {kwargs}") - raise ValueError(f"Unused arguments for agent initialization: {kwargs}") + warnings.warn(f"Unused arguments for agent initialization: {kwargs}") def _init_llm_engine(self, model_name_or_path: str, backend: str): if isinstance(model_name_or_path, str): @@ -206,7 +208,7 @@ def trajectories(self): return trajectories - def tokenize_trajectories(self, tokenizer, return_action_mask: bool = False, return_reward_mask: bool = False): + def tokenize_trajectories(self, tokenizer = None, return_reward_mask: bool = False): if tokenizer is None: tokenizer = self.tokenizer @@ -318,7 +320,7 @@ def rewards(self): def get_verl_data_proto(self): - inputs, other_info_list = self.tokenize_trajectories(return_action_mask=True, return_reward_mask=True) + inputs, other_info_list = self.tokenize_trajectories(return_reward_mask=True) group_ids = np.array([info["group_id"] for info in other_info_list], dtype=object) # Do evaluation here reward_values, other_values = self.rewards diff --git a/agents/agents/agents/auto.py b/agents/agents/agents/auto.py index 19d3e27..84050f4 100644 --- a/agents/agents/agents/auto.py +++ b/agents/agents/agents/auto.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Type, Union +from typing import Any, Callable, Dict, List, Optional, Type, Union from .specialized.think_agent import ThinkAgent from agents.agents.specialized.openai_agent import OpenAIAgent @@ -8,8 +8,7 @@ from .specialized.code_agent import CodeAgent from ..rewards.reward_base import get_reward_from_name -# Registry for agent types - will be populated dynamically -AGENT_MAPPING = {} + class AutoAgent: """ @@ -22,7 +21,7 @@ class AutoAgent: These agents are registered automatically. Additional custom agents can be registered using the register_agent method. """ - + AGENT_MAPPING = {} @classmethod def register_agent(cls, agent_type: str, agent_class: Type[BaseAgent]) -> None: """ @@ -32,7 +31,7 @@ def register_agent(cls, agent_type: str, agent_class: Type[BaseAgent]) -> None: agent_type: The name identifier for the agent type (e.g., 'react', 'code') agent_class: The agent class to instantiate for this type """ - AGENT_MAPPING[agent_type.lower()] = agent_class + cls.AGENT_MAPPING[agent_type.lower()] = agent_class @classmethod def _get_agent_class(cls, agent_type: str) -> Type[BaseAgent]: @@ -50,11 +49,11 @@ def _get_agent_class(cls, agent_type: str) -> Type[BaseAgent]: """ agent_type = agent_type.lower() - if agent_type not in AGENT_MAPPING: - available_types = list(AGENT_MAPPING.keys()) + if agent_type not in cls.AGENT_MAPPING: + available_types = list(cls.AGENT_MAPPING.keys()) raise ValueError(f"Unknown agent type: '{agent_type}'. Available types: {available_types}") - return AGENT_MAPPING[agent_type] + return cls.AGENT_MAPPING[agent_type] @classmethod def from_config(cls, config: Dict[str, Any]) -> BaseAgent: @@ -81,6 +80,14 @@ def from_config(cls, config: Dict[str, Any]) -> BaseAgent: An initialized agent instance. """ # Extract and validate required parameters + if config is None: + raise ValueError("Config could not be None") + + # construct a copy for agent_kwargs + agent_kwargs = {} + for k, v in config.items(): + agent_kwargs[k] = v + required_params = ["agent_type", "template", "tools", "backend"] missing_params = [param for param in required_params if not config.get(param)] @@ -88,20 +95,21 @@ def from_config(cls, config: Dict[str, Any]) -> BaseAgent: raise ValueError(f"Missing required parameters: {', '.join(missing_params)}") agent_type = config["agent_type"] + agent_kwargs.pop("agent_type") tools = get_tools_from_names(config["tools"]) agent_class = cls._get_agent_class(agent_type) + reward_name = config.get("reward_name") + if reward_name is not None: + reward_fn = get_reward_from_name(reward_name) + agent_kwargs.pop("reward_name") + else: + reward_fn = None - # construct a copy for agent_kwargs - agent_kwargs = {} - for k, v in config.items(): - agent_kwargs[k] = v - - agent_kwargs.pop("agent_type") agent_kwargs['tools'] = tools - if "reward_name" in config and config["reward_name"] is not None: - agent_kwargs.pop("reward_name") - reward_fn = get_reward_from_name(config["reward_name"]) - agent_kwargs["reward_fn"] = reward_fn + agent_kwargs['reward_fn'] = reward_fn + + if "use_agent" in agent_kwargs: + agent_kwargs.pop("use_agent") agent = agent_class(**agent_kwargs) @@ -114,11 +122,9 @@ def from_pretrained( agent_type: str, template: str, tools: Optional[List] = None, - vllm: bool = False, debug: bool = False, log_file: str = "agent", - wrapper: bool = False, - reward_name: Optional[str] = None, + reward_fn: Optional[Callable] = None, **kwargs ) -> BaseAgent: """ @@ -147,11 +153,9 @@ def from_pretrained( "model_name_or_path": model_name_or_path, "template": template, "tools": tools or [], - "vllm": vllm, "debug": debug, "log_file": log_file, - "wrapper": wrapper, - "reward_name": reward_name, + "reward_fn": reward_fn, **kwargs } diff --git a/agents/agents/agents/llm_backend.py b/agents/agents/agents/llm_backend.py index 726620a..3a65f00 100644 --- a/agents/agents/agents/llm_backend.py +++ b/agents/agents/agents/llm_backend.py @@ -5,11 +5,11 @@ import asyncio from asyncore import loop from collections import deque +import copy from functools import partial import time from typing import Dict, Any, List, Optional, Callable, AsyncGenerator import uuid -from .templates.utils import convert_messages_to_openai_format import numpy as np from tenacity import retry, stop_after_attempt, wait_exponential import torch @@ -24,8 +24,8 @@ import logging import PIL + LOGGER = logging.getLogger(__name__) -LOGGER.setLevel(logging.DEBUG) try: from verl.protocol import DataProto @@ -353,6 +353,21 @@ def _process_inputs(self, prompts: List[str], vision_inputs: Dict[str, List[PIL. def generate(self, messages_list: str, **kwargs) -> str: raise NotImplementedError("Async Verl backend does not support sync generation") + + def _convert_to_openai_chat_without_tool_call_processing(self, messages: list) -> list: + """ + We use the pure generated content as the history. So we don't want any tool call to be part of the history. + This is used when models are not openai's official models like GPT-4o. + """ + messages = copy.deepcopy(messages) + for message in messages: + if "tool_calls" in message: + del message["tool_calls"] + if "tool_call_id" in message: + del message["tool_call_id"] + if "tool_choice" in message: + del message["tool_choice"] + return messages async def generate_async(self, messages_list: str, **kwargs) -> str: """Generate text from prompt using Verl""" @@ -360,7 +375,7 @@ async def generate_async(self, messages_list: str, **kwargs) -> str: generation_config = {} tensors = torch.ones(len(messages_list), dtype=torch.int64) - messages_list = [convert_messages_to_openai_format(messages) for messages in messages_list] + messages_list = [self._convert_to_openai_chat_without_tool_call_processing(messages) for messages in messages_list] tools = kwargs.get("tools", None) tools_list = np.array([tools] * len(messages_list)) data = {"input_ids": tensors, "raw_prompt": np.array(messages_list), "tools": tools_list} @@ -457,6 +472,21 @@ async def _call(self, messages: List[List[Dict]], **kw) -> str: loop = asyncio.get_running_loop() return await loop.run_in_executor(None, partial(self._blocking_call, messages, **kw)) + def _convert_to_openai_chat_without_tool_call_processing(self, messages: list) -> list: + """ + We use the pure generated content as the history. So we don't want any tool call to be part of the history. + This is used when models are not openai's official models like GPT-4o. + TODO: we need to add support for openai models + """ + messages = copy.deepcopy(messages) + for message in messages: + if "tool_calls" in message: + del message["tool_calls"] + if "tool_call_id" in message: + del message["tool_call_id"] + if "tool_choice" in message: + del message["tool_choice"] + return messages # Public API ‑‑ sync or async depending on caller's context def async_generate( @@ -478,7 +508,7 @@ def async_generate( else: messages_list = messages # batch print(f"[ClientBackend] messages_list: {messages_list}") - messages_list = [convert_messages_to_openai_format(messages) for messages in messages_list] + messages_list = [self._convert_to_openai_chat_without_tool_call_processing(messages) for messages in messages_list] async def _runner(): tasks = [asyncio.create_task(self._call(_input, **kwargs)) for _input in messages_list] diff --git a/agents/agents/agents/react/react_agent.py b/agents/agents/agents/react/react_agent.py index e176149..3c52228 100644 --- a/agents/agents/agents/react/react_agent.py +++ b/agents/agents/agents/react/react_agent.py @@ -123,7 +123,6 @@ def __init__(self, model_name_or_path=model_name_or_path, tools=tools, system_prompt=system_prompt, - max_length=8192, **kwargs ) diff --git a/agents/agents/agents/templates/utils.py b/agents/agents/agents/templates/utils.py index 9f62182..9e6b383 100644 --- a/agents/agents/agents/templates/utils.py +++ b/agents/agents/agents/templates/utils.py @@ -22,22 +22,6 @@ def strip_ansi(s: str) -> str: return ANSI_RE.sub('', s) -def convert_messages_to_openai_format(messages: list) -> list: - """ - Convert messages to OpenAI format. - TODO: add more processing for other types of content - """ - messages = copy.deepcopy(messages) - for message in messages: - # if "tool_calls" in message: - # del message["tool_calls"] - # if "tool_call_id" in message: - # del message["tool_call_id"] - if "tool_choice" in message: - del message["tool_choice"] - return messages - - def convert_messages_to_hf_format(messages: list) -> list: """ Convert messages to Hugging Face format. @@ -305,9 +289,7 @@ def compare_hf_template(tokenizer, template_name, messages=None, tools=None, add plain_highlighted_prompt = strip_ansi(highlighted_prompt) is_equal_between_implemented_prompts = implemented_prompt == plain_highlighted_prompt jinja_template = chat.template.jinja_template() - # Save jinja template to file - with open("jinja_template.jinja", "w") as f: - f.write(jinja_template) + tokenizer.chat_template = jinja_template implemented_jinja_prompt = tokenizer.apply_chat_template(messages, tokenize=False, tools=tools, add_generation_prompt=add_generation_prompt) is_equal_between_jinja_prompts = implemented_jinja_prompt == implemented_prompt diff --git a/agents/agents/agents/utils/tokenizer.py b/agents/agents/agents/utils/tokenizer.py index 00ab8fe..ed9f675 100644 --- a/agents/agents/agents/utils/tokenizer.py +++ b/agents/agents/agents/utils/tokenizer.py @@ -1,4 +1,4 @@ -from transformers import AutoTokenizer +from transformers import AutoProcessor, AutoTokenizer def create_tokenizer(model_name_or_path: str): try: @@ -8,3 +8,12 @@ def create_tokenizer(model_name_or_path: str): tokenizer = None return tokenizer + + +def create_processor(model_name_or_path: str): + try: + processor = AutoProcessor.from_pretrained(model_name_or_path) + except OSError: + processor = None + + return processor \ No newline at end of file diff --git a/agents/tests/unit/agents/mock_tests/__init__.py b/agents/tests/unit/agents/mock_tests/__init__.py new file mode 100644 index 0000000..e1d31e9 --- /dev/null +++ b/agents/tests/unit/agents/mock_tests/__init__.py @@ -0,0 +1 @@ +# Mock tests package for agents diff --git a/agents/tests/unit/agents/mock_tests/conftest.py b/agents/tests/unit/agents/mock_tests/conftest.py new file mode 100644 index 0000000..9b0433b --- /dev/null +++ b/agents/tests/unit/agents/mock_tests/conftest.py @@ -0,0 +1,155 @@ +import pytest +import os +from unittest.mock import Mock, patch, AsyncMock +from typing import Dict, Any, List + + +@pytest.fixture +def mock_llm_engine(): + """Mock LLM engine for testing""" + mock_engine = Mock() + mock_engine.generate_async = AsyncMock() + mock_engine.generate = Mock() + return mock_engine + + +@pytest.fixture +def mock_tokenizer(): + """Mock tokenizer for testing""" + mock_tok = Mock() + mock_tok.encode = Mock(return_value=[1, 2, 3, 4, 5]) + mock_tok.decode = Mock(return_value="Mocked decoded text") + mock_tok.pad_token_id = 0 + mock_tok.eos_token_id = 1 + return mock_tok + + +@pytest.fixture +def mock_processor(): + """Mock processor for testing""" + mock_proc = Mock() + mock_proc.encode = Mock(return_value={"input_ids": [1, 2, 3, 4, 5]}) + mock_proc.decode = Mock(return_value="Mocked processed text") + return mock_proc + + +@pytest.fixture +def mock_tools(): + """Mock tools for testing""" + mock_code_interpreter = Mock() + mock_code_interpreter.name = "code_interpreter" + mock_code_interpreter.description = "Run Python code" + mock_code_interpreter.schema = { + "name": "code_interpreter", + "description": "Run Python code", + "parameters": { + "type": "object", + "properties": { + "code": {"type": "string", "description": "Python code to execute"} + }, + "required": ["code"] + } + } + + mock_answer = Mock() + mock_answer.name = "answer" + mock_answer.description = "Provide final answer" + mock_answer.schema = { + "name": "answer", + "description": "Provide final answer", + "parameters": { + "type": "object", + "properties": { + "text": {"type": "string", "description": "The answer text"} + }, + "required": ["text"] + } + } + + mock_google_search = Mock() + mock_google_search.name = "google_search" + mock_google_search.description = "Search the web" + mock_google_search.schema = { + "name": "google_search", + "description": "Search the web", + "parameters": { + "type": "object", + "properties": { + "query": {"type": "string", "description": "Search query"} + }, + "required": ["query"] + } + } + + return { + "code_interpreter": mock_code_interpreter, + "answer": mock_answer, + "google_search": mock_google_search + } + + +@pytest.fixture +def mock_responses(): + """Mock model responses for testing""" + return { + "code_agent": [ + "I'll solve this math problem step by step.\n```python\n# Calculate the speed\ns = 9 / 4 # 9 km in 4 hours\nprint(f'Speed: {s} km/h')\n```", + "Now let me calculate the time for s + 0.5 speed.\n```python\nnew_speed = s + 0.5\nnew_time = 9 / new_speed\nprint(f'New time: {new_time} hours')\n```", + "The walk takes 204 minutes including coffee shop time." + ], + "react_agent": [ + "Thought: I need to search for information about Python programming.\nAction: google_search\nInput: {\"query\": \"Python programming language features\"}", + "Thought: Based on the search results, I can now provide an answer.\nAction: answer\nInput: {\"text\": \"Python is a high-level programming language known for its simplicity and readability.\"}" + ], + "think_agent": [ + "Let me think about this step by step.\n\nFirst, I need to understand the problem...\n\nBased on my reasoning, the answer is 42." + ] + } + + +@pytest.fixture +def test_config(): + """Provide test configuration based on environment""" + if os.environ.get('CI'): + return { + "backend": "client", + "model": "microsoft/DialoGPT-small", # Smaller CPU-compatible model + "max_steps": 2, + "num_chains": 2, + "use_mock": True + } + else: + return { + "backend": "async_vllm", + "model": "Qwen/Qwen2.5-3B-Instruct", + "max_steps": 4, + "num_chains": 5, + "use_mock": False + } + + +@pytest.fixture +def mock_chain_generation(): + """Mock chain generation methods""" + with patch('agents.agents.agent_base.ChainGeneration.run_async') as mock_run, \ + patch('agents.agents.agent_base.ChainGeneration.get_messages') as mock_get_messages, \ + patch('agents.agents.agent_base.ChainGeneration.tokenize_trajectories') as mock_tokenize: + + mock_run.return_value = None + mock_get_messages.return_value = [{"role": "assistant", "content": "Mocked response"}] + mock_tokenize.return_value = {"input_ids": [[1, 2, 3, 4, 5]], "attention_mask": [[1, 1, 1, 1, 1]]} + + yield { + "run_async": mock_run, + "get_messages": mock_get_messages, + "tokenize_trajectories": mock_tokenize + } + + +@pytest.fixture +def mock_reward_function(): + """Mock reward function for testing""" + mock_reward = Mock() + mock_reward.__call__ = Mock(return_value=0.85) + mock_reward.name = "mock_reward" + return mock_reward diff --git a/agents/tests/unit/agents/mock_tests/test_mock_agent_integration.py b/agents/tests/unit/agents/mock_tests/test_mock_agent_integration.py new file mode 100644 index 0000000..e61a9d1 --- /dev/null +++ b/agents/tests/unit/agents/mock_tests/test_mock_agent_integration.py @@ -0,0 +1,324 @@ +import pytest +from unittest.mock import Mock, patch, AsyncMock +from agents.agents.auto import AutoAgent +from agents.agents.react.react_agent import ReactAgent +from agents.agents.specialized.code_agent import CodeAgent + + +class TestMockAgentIntegration: + """Integration tests for multiple agents working together with mocked dependencies""" + + def test_agent_workflow_code_to_react(self, mock_tools, mock_chain_generation): + """Test workflow where CodeAgent generates code that ReactAgent uses""" + # Create CodeAgent + code_tools = [mock_tools["code_interpreter"]] + code_agent = CodeAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=code_tools, + template="qwen-7b-chat", + backend="client" + ) + + # Create ReactAgent + react_tools = [mock_tools["google_search"], mock_tools["answer"]] + react_agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=react_tools, + template="qwen2.5", + task_info="Use code execution results to provide answers", + backend="client" + ) + + # Test that both agents can be created and configured + assert isinstance(code_agent, CodeAgent) + assert isinstance(react_agent, ReactAgent) + assert len(code_agent.tools) == 1 + assert len(react_agent.tools) == 2 + + # Test that both agents have the expected methods + assert hasattr(code_agent, 'parse') + assert hasattr(react_agent, 'parse') + assert hasattr(code_agent, 'run_async') + assert hasattr(react_agent, 'run_async') + + def test_agent_workflow_react_to_code(self, mock_tools, mock_chain_generation): + """Test workflow where ReactAgent decides to use CodeAgent""" + # Create ReactAgent with code execution capability + react_tools = [mock_tools["google_search"], mock_tools["code_interpreter"], mock_tools["answer"]] + react_agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=react_tools, + template="qwen2.5", + task_info="Search for information and execute code when needed", + backend="client" + ) + + # Test that ReactAgent can handle code execution tools + assert len(react_agent.tools) == 3 + tool_names = [tool.name for tool in react_agent.tools] + assert "google_search" in tool_names + assert "code_interpreter" in tool_names + assert "answer" in tool_names + + # Test system prompt includes code execution + assert "code_interpreter" in react_agent.system_prompt + + def test_auto_agent_workflow(self, mock_tools, mock_chain_generation): + """Test AutoAgent creating different agent types in sequence""" + # Create ReactAgent via AutoAgent + react_config = { + "agent_type": "react", + "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct", + "template": "qwen-7b-chat", + "tools": [mock_tools["google_search"], mock_tools["answer"]], + "backend": "client" + } + + react_agent = AutoAgent.from_config(react_config) + assert isinstance(react_agent, ReactAgent) + + # Create CodeAgent via AutoAgent + code_config = { + "agent_type": "code", + "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct", + "template": "qwen-7b-chat", + "tools": [mock_tools["code_interpreter"]], + "backend": "client" + } + + code_agent = AutoAgent.from_config(code_config) + assert isinstance(code_agent, CodeAgent) + + # Test that both agents work independently + assert react_agent.agent_type != code_agent.agent_type + assert len(react_agent.tools) != len(code_agent.tools) + + def test_agent_tool_sharing(self, mock_tools, mock_chain_generation): + """Test that agents can share common tools""" + # Create agents with overlapping tools + code_agent = CodeAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=[mock_tools["code_interpreter"]], + template="qwen-7b-chat", + backend="client" + ) + + react_agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=[mock_tools["code_interpreter"], mock_tools["answer"]], + template="qwen2.5", + backend="client" + ) + + # Test that both agents can use the shared tool + assert code_agent.tools[0].name == "code_interpreter" + assert react_agent.tools[0].name == "code_interpreter" + + # Test that the tool has the same schema in both agents + assert code_agent.tools[0].schema == react_agent.tools[0].schema + + def test_agent_response_parsing_integration(self, mock_tools, mock_chain_generation): + """Test that different agents can parse each other's response formats""" + # Create a response that could come from either agent + mixed_response = """Thought: I need to calculate something. +Action: code_interpreter +Input: {"code": "print(2 + 2)"}""" + + # Test ReactAgent parsing this response + react_tools = [mock_tools["code_interpreter"], mock_tools["answer"]] + react_agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=react_tools, + template="qwen2.5", + backend="client" + ) + + react_result = react_agent.parse([mixed_response], react_tools) + assert len(react_result) == 1 + assert react_result[0]["role"] == "assistant" + + # Test CodeAgent parsing a code-focused response + code_response = "I'll solve this step by step.\n```python\nx = 2 + 2\nprint(x)\n```" + code_tools = [mock_tools["code_interpreter"]] + code_agent = CodeAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=code_tools, + template="qwen-7b-chat", + backend="client" + ) + + code_result = code_agent.parse([code_response], code_tools) + assert len(code_result) == 1 + assert code_result[0]["role"] == "assistant" + + def test_agent_backend_compatibility(self, mock_tools, mock_chain_generation): + """Test that agents work with different backends""" + backends = ["client", "transformers"] + + for backend in backends: + # Test ReactAgent with different backends + react_agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=[mock_tools["google_search"]], + template="qwen2.5", + backend=backend + ) + assert react_agent.backend == backend + + # Test CodeAgent with different backends + code_agent = CodeAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=[mock_tools["code_interpreter"]], + template="qwen-7b-chat", + backend=backend + ) + assert code_agent.backend == backend + + def test_agent_error_handling_integration(self, mock_tools, mock_chain_generation): + """Test error handling across different agent types""" + # Test ReactAgent with malformed input + react_tools = [mock_tools["google_search"]] + react_agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=react_tools, + template="qwen2.5", + backend="client" + ) + + malformed_response = "Thought: I need to search.\nAction: google_search\nInput: {invalid json" + react_result = react_agent.parse([malformed_response], react_tools) + assert len(react_result) == 1 + assert len(react_result[0]["tool_calls"]) == 0 + + # Test CodeAgent with malformed input + code_tools = [mock_tools["code_interpreter"]] + code_agent = CodeAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=code_tools, + template="qwen-7b-chat", + backend="client" + ) + + malformed_code_response = "```python\nx = 1\n" # Missing closing ``` + code_result = code_agent.parse([malformed_code_response], code_tools) + assert len(code_result) == 1 + assert len(code_result[0]["tool_calls"]) == 0 + + def test_agent_template_compatibility(self, mock_tools, mock_chain_generation): + """Test that agents work with different templates""" + templates = ["qwen-7b-chat", "qwen2.5", "qwen2.5-no-tool"] + + for template in templates: + # Test ReactAgent with different templates + react_agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=[mock_tools["google_search"]], + template=template, + backend="client" + ) + assert react_agent.template == template + + # Test CodeAgent with different templates + code_agent = CodeAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=[mock_tools["code_interpreter"]], + template=template, + backend="client" + ) + assert code_agent.template == template + + def test_agent_async_operations_integration(self, mock_tools, mock_llm_engine): + """Test async operations across different agent types""" + # Mock LLM engine for both agents + with patch('agents.agents.agent_base.BaseAgent._setup_backend') as mock_setup: + mock_setup.return_value = None + + # Test ReactAgent async operations + react_tools = [mock_tools["google_search"]] + react_agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=react_tools, + template="qwen2.5", + backend="client" + ) + react_agent.llm_engine = mock_llm_engine + + # Test CodeAgent async operations + code_tools = [mock_tools["code_interpreter"]] + code_agent = CodeAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=code_tools, + template="qwen-7b-chat", + backend="client" + ) + code_agent.llm_engine = mock_llm_engine + + # Verify both agents can use the same LLM engine + assert react_agent.llm_engine is mock_llm_engine + assert code_agent.llm_engine is mock_llm_engine + + def test_agent_system_prompt_integration(self, mock_tools, mock_chain_generation): + """Test that system prompts are properly integrated across agents""" + # Test ReactAgent system prompt + react_tools = [mock_tools["google_search"], mock_tools["answer"]] + react_agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=react_tools, + template="qwen2.5", + task_info="Test task for integration", + backend="client" + ) + + # Test CodeAgent system prompt + code_tools = [mock_tools["code_interpreter"]] + code_agent = CodeAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=code_tools, + template="qwen-7b-chat", + backend="client" + ) + + # Verify both agents have appropriate system prompts + assert "ReAct-style agent" in react_agent.system_prompt + assert "multi-turn manner" in code_agent.system_prompt + assert "Test task for integration" in react_agent.system_prompt + + # Verify tool information is included in system prompts + for tool in react_agent.tools: + assert tool.name in react_agent.system_prompt + + for tool in code_agent.tools: + assert tool.name in code_agent.system_prompt + + def test_agent_chain_generation_integration(self, mock_tools, mock_chain_generation): + """Test that chain generation methods work across different agent types""" + # Test ReactAgent chain generation + react_tools = [mock_tools["google_search"]] + react_agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=react_tools, + template="qwen2.5", + backend="client" + ) + + # Test CodeAgent chain generation + code_tools = [mock_tools["code_interpreter"]] + code_agent = CodeAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=code_tools, + template="qwen-7b-chat", + backend="client" + ) + + # Verify both agents have chain generation methods + for agent in [react_agent, code_agent]: + assert hasattr(agent, 'run_async') + assert hasattr(agent, 'get_messages') + assert hasattr(agent, 'tokenize_trajectories') + + # Test that methods can be called (they're mocked) + messages = agent.get_messages() + assert isinstance(messages, list) + + trajectories = agent.tokenize_trajectories() + assert isinstance(trajectories, dict) diff --git a/agents/tests/unit/agents/mock_tests/test_mock_auto_agent.py b/agents/tests/unit/agents/mock_tests/test_mock_auto_agent.py new file mode 100644 index 0000000..365f795 --- /dev/null +++ b/agents/tests/unit/agents/mock_tests/test_mock_auto_agent.py @@ -0,0 +1,294 @@ +import pytest +from unittest.mock import Mock, patch, AsyncMock +from agents.agents.auto import AutoAgent +from agents.agents.react.react_agent import ReactAgent +from agents.agents.specialized.code_agent import CodeAgent +from agents.rewards import qa_f1_reward + +def test_auto_agent_registration(): + """Test agent registration functionality""" + # Test that built-in agents are registered + assert "react" in AutoAgent.AGENT_MAPPING + assert "code" in AutoAgent.AGENT_MAPPING + + # Test custom agent registration + class CustomAgent: + pass + + AutoAgent.register_agent("custom", CustomAgent) + assert "custom" in AutoAgent.AGENT_MAPPING + assert AutoAgent.AGENT_MAPPING["custom"] == CustomAgent + + +def test_auto_agent_from_config_react(): + """Test creating ReactAgent from config""" + config = { + "agent_type": "react", + "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct", + "template": "qwen2.5", + "tools": ["google_search", "answer"], + "backend": "client" + } + + agent = AutoAgent.from_config(config) + + assert isinstance(agent, ReactAgent) + assert agent.model_name_or_path == "Qwen/Qwen2.5-3B-Instruct" + assert agent.template == "qwen2.5" + assert len(agent.tools) == 2 + assert agent.backend == "client" + +def test_auto_agent_from_config_code(): + """Test creating CodeAgent from config""" + config = { + "agent_type": "code", + "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct", + "template": "qwen2.5", + "tools": ["code_interpreter"], + "backend": "client" + } + + agent = AutoAgent.from_config(config) + + assert isinstance(agent, CodeAgent) + assert agent.model_name_or_path == "Qwen/Qwen2.5-3B-Instruct" + assert len(agent.tools) == 1 + assert agent.backend == "client" + +def test_auto_agent_from_config_with_reward(): + """Test creating agent with reward function""" + config = { + "agent_type": "react", + "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct", + "template": "qwen2.5", + "tools": ["google_search"], + "reward_fn": qa_f1_reward, + "backend": "client" + } + + agent = AutoAgent.from_config(config) + + assert isinstance(agent, ReactAgent) + +def test_auto_agent_from_pretrained(): + """Test creating agent using from_pretrained method""" + agent = AutoAgent.from_pretrained( + model_name_or_path="Qwen/Qwen2.5-3B-Instruct", + agent_type="react", + template="qwen2.5", + tools=["google_search", "answer"], + debug=True, + backend="client" + ) + + assert isinstance(agent, ReactAgent) + assert agent.model_name_or_path == "Qwen/Qwen2.5-3B-Instruct" + assert agent.template == "qwen2.5" + assert agent.backend == "client" + +def test_auto_agent_from_config_missing_params(): + """Test config validation with missing parameters""" + # Missing agent_type + config1 = { + "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct", + "template": "qwen2.5", + "tools": ["google_search", "answer"], + "backend": "client" + } + + with pytest.raises(ValueError, match="Missing required parameter"): + AutoAgent.from_config(config1) + + # Missing template + config2 = { + "agent_type": "react", + "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct", + "tools": ["google_search", "answer"], + "backend": "client" + } + + with pytest.raises(ValueError, match="Missing required parameter"): + AutoAgent.from_config(config2) + + # Missing tools + config3 = { + "agent_type": "react", + "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct", + "template": "qwen2.5", + "backend": "client" + } + + with pytest.raises(ValueError, match="Missing required parameter"): + AutoAgent.from_config(config3) + + # Missing backend + config4 = { + "agent_type": "react", + "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct", + "template": "qwen2.5", + "tools": ["google_search", "answer"] + } + + with pytest.raises(ValueError, match="Missing required parameter"): + AutoAgent.from_config(config4) + +def test_auto_agent_from_config_invalid_type(): + """Test config validation with invalid agent type""" + config = { + "agent_type": "invalid_type", + "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct", + "template": "qwen2.5", + "tools": ["google_search", "answer"], + "backend": "client" + } + + with pytest.raises(ValueError, match="Unknown agent type"): + AutoAgent.from_config(config) + +def test_auto_agent_tool_loading(): + """Test that tools are properly loaded from names""" + config = { + "agent_type": "react", + "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct", + "template": "qwen2.5", + "tools": ["google_search", "answer"], + "backend": "client" + } + + agent = AutoAgent.from_config(config) + assert len(agent.tools) == 2 + assert agent.tools[0].name == "google_search" + assert agent.tools[1].name == "answer" + + +def test_auto_agent_debug_mode(): + """Test debug mode configuration""" + config = { + "agent_type": "react", + "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct", + "template": "qwen2.5", + "tools": ["google_search"], + "backend": "client", + "debug": True + } + + agent = AutoAgent.from_config(config) + assert agent.debug is True + +def test_auto_agent_log_file_configuration(): + """Test log file configuration""" + config = { + "agent_type": "react", + "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct", + "template": "qwen2.5", + "tools": ["google_search"], + "backend": "client", + "log_file": "test_agent" + } + + agent = AutoAgent.from_config(config) + assert hasattr(agent, 'logger') + +def test_auto_agent_max_length_configuration(): + """Test max length configuration""" + config = { + "agent_type": "react", + "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct", + "template": "qwen2.5", + "tools": ["google_search"], + "backend": "client", + "max_length": 4096 + } + + agent = AutoAgent.from_config(config) + assert agent.max_length == 4096 + +def test_auto_agent_task_info_configuration(): + """Test task info configuration for ReactAgent""" + task_info = "Use web search to find information and provide answers" + config = { + "agent_type": "react", + "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct", + "template": "qwen2.5", + "tools": ["google_search", "answer"], + "backend": "client", + "task_info": task_info + } + + agent = AutoAgent.from_config(config) + assert isinstance(agent, ReactAgent) + assert task_info in agent.system_prompt + +def test_auto_agent_custom_agent_registration(): + """Test custom agent registration and usage""" + class CustomTestAgent: + def __init__(self, **kwargs): + self.config = kwargs + + # Register custom agent + AutoAgent.register_agent("custom_test", CustomTestAgent) + + # Test that it's registered + assert "custom_test" in AutoAgent.AGENT_MAPPING + + # Test creating custom agent + config = { + "agent_type": "custom_test", + "model_name_or_path": "test-model", + "template": "test-template", + "tools": ["answer"], + "backend": "client" + } + + agent = AutoAgent.from_config(config) + assert isinstance(agent, CustomTestAgent) + +def test_auto_agent_error_handling(): + """Test error handling in agent creation""" + # Test with completely invalid config + with pytest.raises(ValueError): + AutoAgent.from_config({}) + + # Test with None config + with pytest.raises(ValueError): + AutoAgent.from_config(None) + +def test_auto_agent_environment_specific_config(test_config): + """Test environment-specific configuration""" + if test_config["use_mock"]: + # CI environment - use smaller model and fewer steps + config = { + "agent_type": "react", + "model_name_or_path": test_config["model"], + "template": "qwen2.5", + "tools": ["google_search"], + "backend": test_config["backend"] + } + + agent = AutoAgent.from_config(config) + assert agent.backend == test_config["backend"] + assert agent.model_name_or_path == test_config["model"] + +def test_auto_agent_tool_validation(): + """Test that tools are properly validated and stored""" + config = { + "agent_type": "react", + "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct", + "template": "qwen2.5", + "tools": ["google_search", "answer"], + "backend": "client" + } + + agent = AutoAgent.from_config(config) + + # Verify tools are properly stored + assert len(agent.tools) == 2 + tool_names = [tool.name for tool in agent.tools] + assert "google_search" in tool_names + assert "answer" in tool_names + + # Verify tool schemas + for tool in agent.tools: + assert hasattr(tool, 'name') + assert hasattr(tool, 'description') + assert hasattr(tool, 'schema') diff --git a/agents/tests/unit/agents/mock_tests/test_mock_code_agent.py b/agents/tests/unit/agents/mock_tests/test_mock_code_agent.py new file mode 100644 index 0000000..69bf824 --- /dev/null +++ b/agents/tests/unit/agents/mock_tests/test_mock_code_agent.py @@ -0,0 +1,251 @@ +import pytest +from unittest.mock import Mock, patch, AsyncMock +from agents.agents.specialized.code_agent import CodeAgent, extract_python_code_markdown, CodeAgentSystemPrompt + +def test_code_agent_initialization(): + """Test CodeAgent initialization without GPU dependencies""" + tools = ["code_interpreter"] + + agent = CodeAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + backend="client", # Use client backend for CI + debug=True + ) + + # Test basic initialization + assert agent is not None + assert agent.model_name_or_path == "Qwen/Qwen2.5-3B-Instruct" + assert agent.template == "qwen2.5" + assert agent.backend == "client" + assert len(agent.tools) == 1 + assert agent.max_length == 8192 + + # Test system prompt + assert "multi-turn manner" in agent.system_prompt + assert "python code" in agent.system_prompt.lower() + assert "code interpreter" in agent.system_prompt.lower() + + +def test_extract_python_code_markdown(self): + """Test Python code extraction from markdown""" + # Test single code block + text1 = "Here's some code:\n```python\nprint('Hello')\n```\nThat's it." + result1 = extract_python_code_markdown(text1) + assert len(result1) == 1 + assert "print('Hello')" in result1[0] + + # Test multiple code blocks + text2 = "First:\n```python\nx = 1\n```\nSecond:\n```python\ny = 2\n```" + result2 = extract_python_code_markdown(text2) + assert len(result2) == 2 + assert "x = 1" in result2[0] + assert "y = 2" in result2[1] + + # Test no code blocks + text3 = "Just regular text with no code." + result3 = extract_python_code_markdown(text3) + assert len(result3) == 0 + + # Test code block with different spacing + text4 = "```python\n x = 42 \n```" + result4 = extract_python_code_markdown(text4) + assert len(result4) == 1 + assert "x = 42" in result4[0] + +def test_code_agent_parse_single_code_block(self, mock_tools): + """Test parsing responses with single code blocks""" + tools = [mock_tools["code_interpreter"]] + agent = CodeAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + backend="client" + ) + + responses = [ + "I'll solve this step by step.\n```python\nx = 9 / 4\nprint(f'Speed: {x} km/h')\n```" + ] + + result = agent.parse(responses, tools) + + assert len(result) == 1 + assert result[0]["role"] == "assistant" + assert "I'll solve this step by step" in result[0]["content"][0]["text"] + assert len(result[0]["tool_calls"]) == 1 + assert result[0]["tool_calls"][0]["function"]["name"] == "code_interpreter" + assert "x = 9 / 4" in result[0]["tool_calls"][0]["function"]["arguments"] + assert result[0]["status"] == "continue" + assert result[0]["loss"] is True + +def test_code_agent_parse_no_code_block(self, mock_tools): + """Test parsing responses with no code blocks""" + tools = [mock_tools["code_interpreter"]] + agent = CodeAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + backend="client" + ) + + responses = [ + "I'll solve this problem step by step." + ] + + result = agent.parse(responses, tools) + + assert len(result) == 1 + assert result[0]["role"] == "assistant" + assert "I'll solve this problem step by step" in result[0]["content"][0]["text"] + assert len(result[0]["tool_calls"]) == 0 + assert result[0]["status"] == "terminal" + assert result[0]["loss"] is True + + +def test_code_agent_parse_multiple_code_blocks(self): + """Test parsing responses with multiple code blocks (should fail)""" + tools = ["code_interpreter"] + agent = CodeAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + backend="client" + ) + + responses = [ + "Here's the first step:\n```python\nx = 1\n```\nAnd the second:\n```python\ny = 2\n```" + ] + + result = agent.parse(responses, tools) + + assert len(result) == 1 + assert result[0]["role"] == "assistant" + assert len(result[0]["tool_calls"]) == 0 + assert result[0]["status"] == "terminal" + +def test_code_agent_parse_final_answer(): + """Test parsing responses with final answer""" + tools = ["code_interpreter"] + agent = CodeAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + backend="client" + ) + + responses = [ + "The final answer is 204 minutes" + ] + + result = agent.parse(responses, tools) + + assert len(result) == 1 + assert result[0]["role"] == "assistant" + assert "204 minutes" in result[0]["content"][0]["text"] + assert len(result[0]["tool_calls"]) == 0 + assert result[0]["status"] == "terminal" + +def test_code_agent_with_mock_llm_engine(mock_llm_engine): + """Test CodeAgent with mocked LLM engine""" + tools = ["code_interpreter"] + + with patch('agents.agents.agent_base.BaseAgent._setup_backend') as mock_setup: + mock_setup.return_value = None + + agent = CodeAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + backend="client" + ) + + # Mock the LLM engine + agent.llm_engine = mock_llm_engine + + # Test that the agent can be created and configured + assert agent.llm_engine is not None + assert hasattr(agent.llm_engine, 'generate_async') + +def test_code_agent_tool_schema_validation(): + """Test that CodeAgent properly handles tool schemas""" + tools = ["code_interpreter"] + agent = CodeAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + backend="client" + ) + + # Verify tool is properly stored + assert len(agent.tools) == 1 + assert agent.tools[0].name == "code_interpreter" + assert "Run Python code" in agent.tools[0].description + +def test_code_agent_error_handling(): + """Test CodeAgent error handling in parsing""" + tools = ["code_interpreter"] + agent = CodeAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + backend="client" + ) + + # Test with malformed response + malformed_responses = [ + "```python\nx = 1\n" # Missing closing ``` + ] + + result = agent.parse(malformed_responses, tools) + + assert len(result) == 1 + assert result[0]["role"] == "assistant" + assert len(result[0]["tool_calls"]) == 0 + assert result[0]["status"] == "terminal" + +def test_code_agent_chain_generation_integration(mock_chain_generation): + """Test CodeAgent integration with chain generation methods""" + tools = ["code_interpreter"] + + with patch('agents.agents.agent_base.BaseAgent._setup_backend') as mock_setup: + mock_setup.return_value = None + + agent = CodeAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + backend="client" + ) + + # Test that chain generation methods are available + assert hasattr(agent, 'run_async') + assert hasattr(agent, 'get_messages') + assert hasattr(agent, 'tokenize_trajectories') + +@pytest.mark.asyncio +async def test_code_agent_async_operations(mock_llm_engine): + """Test CodeAgent async operations with mocked dependencies""" + tools = ["code_interpreter"] + + with patch('agents.agents.agent_base.BaseAgent._setup_backend') as mock_setup: + mock_setup.return_value = None + + agent = CodeAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + backend="client" + ) + + agent.llm_engine = mock_llm_engine + + # Mock the generate_async method + mock_llm_engine.generate_async.return_value = [ + "I'll solve this step by step.\n```python\nx = 9 / 4\nprint(f'Speed: {x} km/h')\n```" + ] + + # Test that async generation can be called + result = await agent.llm_engine.generate_async(["test"]) + assert len(result) == 1 + assert "```python" in result[0] diff --git a/agents/tests/unit/agents/mock_tests/test_mock_react_agent.py b/agents/tests/unit/agents/mock_tests/test_mock_react_agent.py new file mode 100644 index 0000000..84baf3a --- /dev/null +++ b/agents/tests/unit/agents/mock_tests/test_mock_react_agent.py @@ -0,0 +1,352 @@ +import pytest +from unittest.mock import Mock, patch, AsyncMock +from agents.agents.react.react_agent import ReactAgent, parse_react_step, extract_tool_calls, ReactSystemPromptTemplate + + +class TestMockReactAgent: + """Test ReactAgent with mocked dependencies for CI environments""" + + def test_react_agent_initialization(self, mock_tools): + """Test ReactAgent initialization without GPU dependencies""" + tools = [mock_tools["google_search"], mock_tools["answer"]] + task_info = "Test search task" + + agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + task_info=task_info, + backend="client" # Use client backend for CI + ) + + # Test basic initialization + assert agent is not None + assert agent.model_name_or_path == "Qwen/Qwen2.5-3B-Instruct" + assert agent.template == "qwen2.5" + assert agent.backend == "client" + assert len(agent.tools) == 2 + assert agent.max_length == 8192 + + # Test system prompt contains task info and tools + assert task_info in agent.system_prompt + assert "google_search" in agent.system_prompt + assert "answer" in agent.system_prompt + assert "ReAct-style agent" in agent.system_prompt + + def test_react_agent_system_prompt_formatting(self, mock_tools): + """Test that ReactAgent system prompt is correctly formatted""" + tools = [mock_tools["google_search"], mock_tools["answer"]] + task_info = "Search for information and provide answers" + + agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + task_info=task_info, + backend="client" + ) + + # Check system prompt structure + assert "Think→Act→Observe" in agent.system_prompt + assert "Thought:" in agent.system_prompt + assert "Action:" in agent.system_prompt + assert "Input:" in agent.system_prompt + assert "Answer:" in agent.system_prompt + assert task_info in agent.system_prompt + + # Check tool schemas are included + assert "google_search" in agent.system_prompt + assert "answer" in agent.system_prompt + + def test_react_agent_no_task_info(self, mock_tools): + """Test ReactAgent initialization without task info""" + tools = [mock_tools["google_search"]] + + agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + backend="client" + ) + + # Should still have basic system prompt + assert "ReAct-style agent" in agent.system_prompt + assert len(agent.tools) == 1 + + def test_parse_react_step_complete(self): + """Test parsing complete ReAct step""" + text = """Thought: I need to find information about Python. +Action: google_search +Input: {"query": "Python programming language"}""" + + result = parse_react_step(text) + + assert result["thought"] == "I need to find information about Python." + assert result["action"] == "google_search" + assert result["input"] == '{"query": "Python programming language"}' + + def test_parse_react_step_missing_components(self): + """Test parsing ReAct step with missing components""" + text = "Thought: I'm thinking about something." + result = parse_react_step(text) + + assert result["thought"] == "I'm thinking about something." + assert result["action"] is None + assert result["input"] is None + + def test_parse_react_step_action_only(self): + """Test parsing ReAct step with only action""" + text = "Action: search\nInput: {\"query\": \"test\"}" + result = parse_react_step(text) + + assert result["thought"] is None + assert result["action"] == "search" + assert result["input"] == '{"query": "test"}' + + def test_parse_react_step_case_insensitive(self): + """Test parsing ReAct step with different case""" + text = "THOUGHT: I need to think.\nACTION: search\nINPUT: {\"query\": \"test\"}" + result = parse_react_step(text) + + assert result["thought"] == "I need to think." + assert result["action"] == "search" + assert result["input"] == '{"query": "test"}' + + def test_parse_react_step_multiline_thought(self): + """Test parsing ReAct step with multiline thought""" + text = """Thought: I need to think about this +step by step. First, I should consider +the user's request carefully. +Action: search +Input: {"query": "multiline test"}""" + + result = parse_react_step(text) + + assert "step by step" in result["thought"] + assert "First, I should consider" in result["thought"] + assert result["action"] == "search" + assert result["input"] == '{"query": "multiline test"}' + + def test_extract_tool_calls_valid_json(self): + """Test extracting tool calls from valid JSON input""" + action_input = '{"name": "google_search", "arguments": {"query": "test"}}' + result = extract_tool_calls(action_input) + + assert len(result) == 1 + assert result[0]["name"] == "google_search" + assert result[0]["arguments"] == {"query": "test"} + + def test_extract_tool_calls_invalid_json(self): + """Test extracting tool calls from invalid JSON input""" + action_input = '{"name": "google_search", "arguments": {"query": "test"}' # Missing } + result = extract_tool_calls(action_input) + + assert len(result) == 0 + + def test_extract_tool_calls_none_input(self): + """Test extracting tool calls from None input""" + result = extract_tool_calls(None) + assert len(result) == 0 + + def test_extract_tool_calls_empty_string(self): + """Test extracting tool calls from empty string""" + result = extract_tool_calls("") + assert len(result) == 0 + + def test_react_agent_parse_single_tool_call(self, mock_tools): + """Test parsing responses with single tool call""" + tools = [mock_tools["google_search"], mock_tools["answer"]] + agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + backend="client" + ) + + responses = ["""Thought: I need to search for information. +Action: google_search +Input: {"query": "test query"}"""] + + result = agent.parse(responses, tools) + + assert len(result) == 1 + assert result[0]["role"] == "assistant" + assert "Thought: I need to search for information." in result[0]["content"][0]["text"] + assert len(result[0]["tool_calls"]) == 1 + assert result[0]["tool_calls"][0]["function"]["name"] == "google_search" + assert result[0]["tool_calls"][0]["function"]["arguments"] == {"query": "test query"} + assert result[0]["loss"] is True + + def test_react_agent_parse_no_tool_call(self, mock_tools): + """Test parsing responses with no tool call""" + tools = [mock_tools["google_search"], mock_tools["answer"]] + agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + backend="client" + ) + + responses = ["Thought: I'm thinking about this problem."] + + result = agent.parse(responses, tools) + + assert len(result) == 1 + assert result[0]["role"] == "assistant" + assert "Thought: I'm thinking about this problem." in result[0]["content"][0]["text"] + assert len(result[0]["tool_calls"]) == 0 + assert result[0]["loss"] is True + + def test_react_agent_parse_final_answer(self, mock_tools): + """Test parsing responses with final answer""" + tools = [mock_tools["google_search"], mock_tools["answer"]] + agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + backend="client" + ) + + responses = ["""Thought: I have enough information now. +Action: answer +Input: {"text": "The answer is 42."}"""] + + result = agent.parse(responses, tools) + + assert len(result) == 1 + assert result[0]["role"] == "assistant" + assert "The answer is 42." in str(result[0]["tool_calls"][0]["function"]["arguments"]) + assert result[0]["tool_calls"][0]["function"]["name"] == "answer" + + def test_react_agent_parse_multiple_responses(self, mock_tools): + """Test parsing multiple responses""" + tools = [mock_tools["google_search"], mock_tools["answer"]] + agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + backend="client" + ) + + responses = [ + """Thought: I need to search for information. +Action: google_search +Input: {"query": "first query"}""", + """Thought: Now I can provide an answer. +Action: answer +Input: {"text": "Final answer"}""" + ] + + result = agent.parse(responses, tools) + + assert len(result) == 2 + assert result[0]["tool_calls"][0]["function"]["name"] == "google_search" + assert result[1]["tool_calls"][0]["function"]["name"] == "answer" + + def test_react_agent_tool_schema_validation(self, mock_tools): + """Test that ReactAgent properly handles tool schemas""" + tools = [mock_tools["google_search"], mock_tools["answer"]] + agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + backend="client" + ) + + # Verify tools are properly stored + assert len(agent.tools) == 2 + tool_names = [tool.name for tool in agent.tools] + assert "google_search" in tool_names + assert "answer" in tool_names + + def test_react_agent_with_mock_llm_engine(self, mock_tools, mock_llm_engine): + """Test ReactAgent with mocked LLM engine""" + tools = [mock_tools["google_search"], mock_tools["answer"]] + + with patch('agents.agents.agent_base.BaseAgent._setup_backend') as mock_setup: + mock_setup.return_value = None + + agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + backend="client" + ) + + # Mock the LLM engine + agent.llm_engine = mock_llm_engine + + # Test that the agent can be created and configured + assert agent.llm_engine is not None + assert hasattr(agent.llm_engine, 'generate_async') + + def test_react_agent_chain_generation_integration(self, mock_tools, mock_chain_generation): + """Test ReactAgent integration with chain generation methods""" + tools = [mock_tools["google_search"], mock_tools["answer"]] + + with patch('agents.agents.agent_base.BaseAgent._setup_backend') as mock_setup: + mock_setup.return_value = None + + agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + backend="client" + ) + + # Test that chain generation methods are available + assert hasattr(agent, 'run_async') + assert hasattr(agent, 'get_messages') + assert hasattr(agent, 'tokenize_trajectories') + + @pytest.mark.asyncio + async def test_react_agent_async_operations(self, mock_tools, mock_llm_engine): + """Test ReactAgent async operations with mocked dependencies""" + tools = [mock_tools["google_search"], mock_tools["answer"]] + + with patch('agents.agents.agent_base.BaseAgent._setup_backend') as mock_setup: + mock_setup.return_value = None + + agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + backend="client" + ) + + agent.llm_engine = mock_llm_engine + + # Mock the generate_async method + mock_llm_engine.generate_async.return_value = [ + "Thought: I need to search.\nAction: google_search\nInput: {\"query\": \"test\"}" + ] + + # Test that async generation can be called + result = await agent.llm_engine.generate_async(["test"]) + assert len(result) == 1 + assert "Thought:" in result[0] + assert "Action:" in result[0] + + def test_react_agent_error_handling(self, mock_tools): + """Test ReactAgent error handling in parsing""" + tools = [mock_tools["google_search"], mock_tools["answer"]] + agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + backend="client" + ) + + # Test with malformed JSON in input + malformed_responses = [ + """Thought: I need to search. +Action: google_search +Input: {"query": "test query""" # Missing closing } + ] + + result = agent.parse(malformed_responses, tools) + + assert len(result) == 1 + assert result[0]["role"] == "assistant" + # Should handle malformed input gracefully + assert len(result[0]["tool_calls"]) == 0 diff --git a/agents/tests/unit/agents/test_vision_agent.py b/agents/tests/unit/agents/test_vision_agent.py index 1759861..0f67ab5 100644 --- a/agents/tests/unit/agents/test_vision_agent.py +++ b/agents/tests/unit/agents/test_vision_agent.py @@ -1,21 +1,19 @@ from agents.agents.react.react_agent import ReactAgent -from agents.tools import google_search_serper, answer - +from agents.tools import answer_qa import pytest @pytest.mark.asyncio(loop_scope="session") async def test_vision_agent(): - tools = [google_search_serper, answer] + tools = [answer_qa] - task_info = "Use web search to get answers." + task_info = "Answer the question based on the image." react_agent = ReactAgent( "Qwen/Qwen2.5-VL-3B-Instruct", tools=tools, template="qwen2.5-vl", task_info=task_info, - backend="async_vllm", - debug=True + backend="async_vllm" ) messages = [ @@ -41,6 +39,9 @@ async def test_vision_agent(): start_messages=messages, num_chains=10 ) - - inputs = react_agent.tokenize_trajectories(return_action_mask=True) + messages_list = react_agent.get_messages() + messages = messages_list[0]['messages'] + for message in messages: + print(f"{message['role']}: {message['content']}") + inputs = react_agent.tokenize_trajectories() print(inputs) \ No newline at end of file diff --git a/agents/tests/unit/tools/test_code_tool.py b/agents/tests/unit/tools/test_code_tool.py index 80f25d2..388845f 100644 --- a/agents/tests/unit/tools/test_code_tool.py +++ b/agents/tests/unit/tools/test_code_tool.py @@ -45,14 +45,14 @@ async def test_double_release(): await code_interpreter.release(id="x") # must return instantly -@pytest.mark.asyncio(loop_scope="session") -async def test_global_clean(): +# @pytest.mark.asyncio(loop_scope="session") +# async def test_global_clean(): - async def one_chain(i): - await code_interpreter(id=f"c{i}", code="x=1") - # We don't release the env here, so it will be cleaned up automatically - # await code_interpreter.release_env(id=f"c{i}") +# async def one_chain(i): +# await code_interpreter(id=f"c{i}", code="x=1") +# # We don't release the env here, so it will be cleaned up automatically +# # await code_interpreter.release_env(id=f"c{i}") - await asyncio.gather(*[ - one_chain(i) for i in range(code_interpreter.pool_size-5) - ]) \ No newline at end of file +# await asyncio.gather(*[ +# one_chain(i) for i in range(code_interpreter.pool_size-5) +# ]) \ No newline at end of file diff --git a/verl b/verl index 861f63b..237d9ca 160000 --- a/verl +++ b/verl @@ -1 +1 @@ -Subproject commit 861f63ba8097a43ababe27116842512783080586 +Subproject commit 237d9cacd2ede001c21f1a1daa44e8e8598993e1