Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/getting-started/faq.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Anyone! We had a few groups in mind when building MASEval.

1. Check this documentation.
2. If the feature does not exist, please [open an issue on GitHub](https://github.com/parameterlab/MASEval/issues/new). Feature requests are welcome.
3. Consider implementing it yourself. Check out the [contributing guide](contributing.md) for details.
3. Consider implementing it yourself. Check out the [contributing guide](https://github.com/parameterlab/MASEval/blob/main/CONTRIBUTING.md) for details.

## Q: Can I only test multi-agent systems?

Expand Down
2 changes: 1 addition & 1 deletion maseval/benchmark/gaia2/gaia2.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def __init__(
fail_on_evaluation_error: bool = False,
progress_bar: bool | str = True,
seed: Optional[int] = None,
seed_generator=None,
seed_generator: Optional[SeedGenerator] = None,
):
"""Initialize benchmark with Gaia2-specific defaults.

Expand Down
7 changes: 4 additions & 3 deletions maseval/benchmark/macs/macs.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def get_model_adapter(self, model_id, **kwargs):
)
from maseval.core.config import ConfigurableMixin
from maseval.core.tracing import TraceableMixin
from maseval.core.seeding import DefaultSeedGenerator


# Statuses where agent is accountable (included in scoring)
Expand Down Expand Up @@ -147,7 +148,7 @@ def _schema_to_inputs(schema: Dict[str, Any]) -> Dict[str, Any]:
}
return inputs

def __call__(self, **kwargs) -> str:
def __call__(self, **kwargs: Any) -> str:
"""Execute the tool with simulated response.

Args:
Expand Down Expand Up @@ -828,7 +829,7 @@ def setup_user( # type: ignore[invalid-method-override]
agent_data: Dict[str, Any],
environment: MACSEnvironment,
task: Task,
seed_generator,
seed_generator: DefaultSeedGenerator,
) -> MACSUser:
"""Create MACS user simulator.

Expand Down Expand Up @@ -872,7 +873,7 @@ def setup_agents( # type: ignore[invalid-method-override]
environment: MACSEnvironment,
task: Task,
user: Optional[User],
seed_generator,
seed_generator: DefaultSeedGenerator,
) -> Tuple[Sequence[AgentAdapter], Dict[str, AgentAdapter]]:
"""Create agents for this task. Must be implemented by subclass.

Expand Down
2 changes: 1 addition & 1 deletion maseval/benchmark/multiagentbench/multiagentbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def __init__(
fail_on_evaluation_error: bool = False,
progress_bar: bool | str = True,
seed: Optional[int] = None,
seed_generator=None,
seed_generator: Optional[SeedGenerator] = None,
):
"""Initialize the benchmark.

Expand Down
7 changes: 4 additions & 3 deletions maseval/benchmark/tau2/tau2.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def get_model_adapter(self, model_id, **kwargs):
from maseval import AgentAdapter, Benchmark, Evaluator, ModelAdapter, Task, User
from maseval.core.user import AgenticLLMUser
from maseval.core.callback import BenchmarkCallback
from maseval.core.seeding import DefaultSeedGenerator, SeedGenerator

from maseval.benchmark.tau2.environment import Tau2Environment
from maseval.benchmark.tau2.evaluator import Tau2Evaluator
Expand Down Expand Up @@ -252,7 +253,7 @@ def __init__(
fail_on_evaluation_error: bool = False,
progress_bar: bool | str = True,
seed: Optional[int] = None,
seed_generator=None,
seed_generator: Optional[SeedGenerator] = None,
):
"""Initialize benchmark with tau2-specific defaults.

Expand Down Expand Up @@ -328,7 +329,7 @@ def setup_user( # type: ignore[override]
agent_data: Dict[str, Any],
environment: Tau2Environment,
task: Task,
seed_generator,
seed_generator: DefaultSeedGenerator,
) -> Optional[User]:
"""Create Tau2 user simulator.

Expand Down Expand Up @@ -964,7 +965,7 @@ def setup_agents( # type: ignore[invalid-method-override]
environment: Tau2Environment,
task: Task,
user: Optional[User],
seed_generator,
seed_generator: DefaultSeedGenerator,
) -> Tuple[Sequence[AgentAdapter], Dict[str, AgentAdapter]]:
"""Create the default tau2 agent.

Expand Down
4 changes: 2 additions & 2 deletions maseval/core/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,7 +740,7 @@ def setup_evaluators(self, environment, task, agents, user, seed_generator):
pass

@abstractmethod
def get_model_adapter(self, model_id: str, **kwargs) -> ModelAdapter:
def get_model_adapter(self, model_id: str, **kwargs: Any) -> ModelAdapter:
"""Provide a ModelAdapter for benchmark components that require LLM access.

Many benchmark components beyond the agents themselves require access to language
Expand Down Expand Up @@ -772,7 +772,7 @@ def get_model_adapter(self, model_id: str, **kwargs) -> ModelAdapter:
For proper tracing, register the adapter after creation using the kwargs:

```python
def get_model_adapter(self, model_id: str, **kwargs) -> ModelAdapter:
def get_model_adapter(self, model_id: str, **kwargs: Any) -> ModelAdapter:
adapter = GoogleGenAIModelAdapter(self.client, model_id=model_id)

# Register for tracing if registration info provided
Expand Down
2 changes: 1 addition & 1 deletion maseval/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def gather_config(self) -> Dict[str, Any]:
task execution completes. The `gather_config()` method is called sequentially
and should return static configuration data (not runtime state).

Attributes:
Note:
Components should expose their configuration through instance variables or
properties that can be accessed during configuration gathering.
"""
Expand Down
10 changes: 5 additions & 5 deletions maseval/core/tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,11 @@ def gather_traces(self) -> Dict[str, Any]:
traces during concurrent execution, but the `gather_traces()` method
itself is called sequentially.

Attributes:
Components can store traces in any internal data structure. Common patterns:
- `self.logs = []` for invocation histories
- `self._messages = MessageHistory()` for conversations
- `self.logs = []` for simulator attempts
Note:
Components can store traces in any internal data structure. Common patterns
include `self.logs = []` for invocation histories,
`self._messages = MessageHistory()` for conversations,
and `self.logs = []` for simulator attempts.
"""

def gather_traces(self) -> Dict[str, Any]:
Expand Down
4 changes: 2 additions & 2 deletions maseval/core/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from .simulator import UserLLMSimulator, AgenticUserLLMSimulator
from .tracing import TraceableMixin
from .config import ConfigurableMixin
from typing import Dict, Any, Optional, List, Callable
from typing import Any, Dict, Optional, List, Callable
from abc import ABC, abstractmethod
from datetime import datetime
from enum import Enum
Expand Down Expand Up @@ -455,7 +455,7 @@ def __init__(
scenario: str,
tools: Optional[Dict[str, Callable]] = None,
max_internal_steps: int = 5,
**kwargs,
**kwargs: Any,
):
"""Initialize AgenticLLMUser.

Expand Down
18 changes: 9 additions & 9 deletions maseval/interface/agents/camel.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ class CamelAgentAdapter(AgentAdapter):
camel-ai to be installed: `pip install maseval[camel]`
"""

def __init__(self, agent_instance, name: str, callbacks=None):
def __init__(self, agent_instance: Any, name: str, callbacks: Optional[List[Any]] = None):
"""Initialize the CAMEL adapter.

Note: We don't call super().__init__() to avoid initializing self.logs as a list,
Expand Down Expand Up @@ -619,7 +619,7 @@ class CamelLLMUser(LLMUser):
```
"""

def get_tool(self):
def get_tool(self) -> Any:
"""Get a CAMEL-compatible tool for user interaction.

Returns a CAMEL FunctionTool that wraps the respond method,
Expand Down Expand Up @@ -687,7 +687,7 @@ class CamelAgentUser(User):

def __init__(
self,
user_agent,
user_agent: Any,
initial_query: str,
name: str = "camel_agent_user",
max_turns: int = 10,
Expand Down Expand Up @@ -775,7 +775,7 @@ def is_done(self) -> bool:
"""
return self._turn_count >= self._max_turns

def get_tool(self):
def get_tool(self) -> Any:
"""Return a CAMEL FunctionTool for agent-to-user interaction.

Returns:
Expand Down Expand Up @@ -833,8 +833,8 @@ def gather_config(self) -> Dict[str, Any]:


def camel_role_playing_execution_loop(
role_playing,
task,
role_playing: Any,
task: Any,
max_steps: int = 10,
tracer: Optional["CamelRolePlayingTracer"] = None,
) -> Any:
Expand Down Expand Up @@ -959,7 +959,7 @@ def execution_loop(self, agents, task, environment, user):
```
"""

def __init__(self, role_playing, name: str = "role_playing"):
def __init__(self, role_playing: Any, name: str = "role_playing"):
"""Initialize the RolePlaying tracer.

Args:
Expand All @@ -973,7 +973,7 @@ def __init__(self, role_playing, name: str = "role_playing"):
self._termination_reason: Optional[str] = None
self._step_logs: List[Dict[str, Any]] = []

def record_step(self, assistant_response, user_response) -> None:
def record_step(self, assistant_response: Any, user_response: Any) -> None:
"""Record data from a RolePlaying step.

Call this after each role_playing.step() to track progress.
Expand Down Expand Up @@ -1093,7 +1093,7 @@ def setup_agents(self, agent_data, environment, task, user):
```
"""

def __init__(self, workforce, name: str = "workforce"):
def __init__(self, workforce: Any, name: str = "workforce"):
"""Initialize the Workforce tracer.

Args:
Expand Down
4 changes: 2 additions & 2 deletions maseval/interface/agents/langgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import time
from datetime import datetime
from typing import TYPE_CHECKING, Any, Dict
from typing import TYPE_CHECKING, Any, Dict, List, Optional

from maseval import AgentAdapter, MessageHistory, LLMUser

Expand Down Expand Up @@ -116,7 +116,7 @@ def chatbot(state: MessagesState):
langgraph to be installed: `pip install maseval[langgraph]`
"""

def __init__(self, agent_instance, name: str, callbacks=None, config=None):
def __init__(self, agent_instance: Any, name: str, callbacks: Optional[List[Any]] = None, config: Optional[Dict[str, Any]] = None):
"""Initialize the LangGraph adapter.

Args:
Expand Down
6 changes: 3 additions & 3 deletions maseval/interface/agents/llamaindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import asyncio
import time
from datetime import datetime
from typing import TYPE_CHECKING, Any, Dict, List
from typing import TYPE_CHECKING, Any, Dict, List, Optional

from maseval import AgentAdapter, MessageHistory, LLMUser

Expand Down Expand Up @@ -111,7 +111,7 @@ def search(query: str) -> str:
llama-index-core to be installed: `pip install maseval[llamaindex]`
"""

def __init__(self, agent_instance, name: str, callbacks=None):
def __init__(self, agent_instance: Any, name: str, callbacks: Optional[List[Any]] = None):
"""Initialize the LlamaIndex adapter.

Args:
Expand Down Expand Up @@ -447,7 +447,7 @@ class LlamaIndexLLMUser(LLMUser):
```
"""

def get_tool(self):
def get_tool(self) -> Any:
"""Get a LlamaIndex-compatible tool for user interaction.

Returns:
Expand Down
2 changes: 1 addition & 1 deletion maseval/interface/agents/smolagents.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,7 @@ class SmolAgentLLMUser(LLMUser):
```
"""

def get_tool(self):
def get_tool(self) -> Any:
"""Get a smolagents-compatible tool for user interaction.

Returns a `SmolAgentUserSimulationInputTool` instance that wraps this user
Expand Down