From ffdf504c72d32aaf91c9c03914c3eae8fcce6b4b Mon Sep 17 00:00:00 2001 From: cemde Date: Sun, 15 Feb 2026 23:58:54 +0100 Subject: [PATCH 1/2] fixed type hinting for better documentation --- docs/getting-started/faq.md | 2 +- docs/reference/environment.md | 2 -- docs/reference/user.md | 4 +--- maseval/benchmark/gaia2/gaia2.py | 2 +- maseval/benchmark/macs/macs.py | 7 ++++--- .../multiagentbench/multiagentbench.py | 2 +- maseval/benchmark/tau2/tau2.py | 7 ++++--- maseval/core/benchmark.py | 4 ++-- maseval/core/config.py | 2 +- maseval/core/tracing.py | 10 +++++----- maseval/core/user.py | 4 ++-- maseval/interface/agents/camel.py | 18 +++++++++--------- maseval/interface/agents/langgraph.py | 4 ++-- maseval/interface/agents/llamaindex.py | 6 +++--- maseval/interface/agents/smolagents.py | 2 +- 15 files changed, 37 insertions(+), 39 deletions(-) diff --git a/docs/getting-started/faq.md b/docs/getting-started/faq.md index 84121ab8..8408311f 100644 --- a/docs/getting-started/faq.md +++ b/docs/getting-started/faq.md @@ -12,7 +12,7 @@ Anyone! We had a few groups in mind when building MASEval. 1. Check this documentation. 2. If the feature does not exist, please [open an issue on GitHub](https://github.com/parameterlab/MASEval/issues/new). Feature requests are welcome. -3. Consider implementing it yourself. Check out the [contributing guide](contributing.md) for details. +3. Consider implementing it yourself. Check out the [contributing guide](https://github.com/parameterlab/MASEval/blob/main/CONTRIBUTING.md) for details. ## Q: Can I only test multi-agent systems? diff --git a/docs/reference/environment.md b/docs/reference/environment.md index 77d40e30..18d4e1bd 100644 --- a/docs/reference/environment.md +++ b/docs/reference/environment.md @@ -13,5 +13,3 @@ Some agent adapters expose helper tools or user-simulation tools that can be use [:material-github: View source](https://github.com/parameterlab/maseval/blob/main/maseval/interface/agents/smolagents.py){ .md-source-file } ::: maseval.interface.agents.smolagents.SmolAgentAdapter - -::: maseval.interface.agents.smolagents.SmolAgentLLMUser diff --git a/docs/reference/user.md b/docs/reference/user.md index c739ad25..fa525869 100644 --- a/docs/reference/user.md +++ b/docs/reference/user.md @@ -16,9 +16,7 @@ The `LLMUser` is initialized with a persona and a scenario, both of which are ty Some integrations provide convenience user/tool implementations for specific agent frameworks. For example: -[:material-github: View source](https://github.com/parameterlab/maseval/blob/main/maseval/interface/agents/smolagents.py){ .md-source-file } - -::: maseval.interface.agents.smolagents.SmolAgentLLMUser +See [SmolAgentLLMUser](../interface/agents/smolagents.md) in the smolagents integration documentation. [:material-github: View source](https://github.com/parameterlab/maseval/blob/main/maseval/interface/agents/langgraph.py){ .md-source-file } diff --git a/maseval/benchmark/gaia2/gaia2.py b/maseval/benchmark/gaia2/gaia2.py index b922ebae..7865a4e6 100644 --- a/maseval/benchmark/gaia2/gaia2.py +++ b/maseval/benchmark/gaia2/gaia2.py @@ -124,7 +124,7 @@ def __init__( fail_on_evaluation_error: bool = False, progress_bar: bool | str = True, seed: Optional[int] = None, - seed_generator=None, + seed_generator: Optional[SeedGenerator] = None, ): """Initialize benchmark with Gaia2-specific defaults. diff --git a/maseval/benchmark/macs/macs.py b/maseval/benchmark/macs/macs.py index 2ff1057d..71bdfe7b 100644 --- a/maseval/benchmark/macs/macs.py +++ b/maseval/benchmark/macs/macs.py @@ -65,6 +65,7 @@ def get_model_adapter(self, model_id, **kwargs): ) from maseval.core.config import ConfigurableMixin from maseval.core.tracing import TraceableMixin +from maseval.core.seeding import DefaultSeedGenerator # Statuses where agent is accountable (included in scoring) @@ -147,7 +148,7 @@ def _schema_to_inputs(schema: Dict[str, Any]) -> Dict[str, Any]: } return inputs - def __call__(self, **kwargs) -> str: + def __call__(self, **kwargs: Any) -> str: """Execute the tool with simulated response. Args: @@ -828,7 +829,7 @@ def setup_user( # type: ignore[invalid-method-override] agent_data: Dict[str, Any], environment: MACSEnvironment, task: Task, - seed_generator, + seed_generator: DefaultSeedGenerator, ) -> MACSUser: """Create MACS user simulator. @@ -872,7 +873,7 @@ def setup_agents( # type: ignore[invalid-method-override] environment: MACSEnvironment, task: Task, user: Optional[User], - seed_generator, + seed_generator: DefaultSeedGenerator, ) -> Tuple[Sequence[AgentAdapter], Dict[str, AgentAdapter]]: """Create agents for this task. Must be implemented by subclass. diff --git a/maseval/benchmark/multiagentbench/multiagentbench.py b/maseval/benchmark/multiagentbench/multiagentbench.py index 78f75bc7..f44c3e05 100644 --- a/maseval/benchmark/multiagentbench/multiagentbench.py +++ b/maseval/benchmark/multiagentbench/multiagentbench.py @@ -95,7 +95,7 @@ def __init__( fail_on_evaluation_error: bool = False, progress_bar: bool | str = True, seed: Optional[int] = None, - seed_generator=None, + seed_generator: Optional[SeedGenerator] = None, ): """Initialize the benchmark. diff --git a/maseval/benchmark/tau2/tau2.py b/maseval/benchmark/tau2/tau2.py index db6a9b9a..7da53af4 100644 --- a/maseval/benchmark/tau2/tau2.py +++ b/maseval/benchmark/tau2/tau2.py @@ -68,6 +68,7 @@ def get_model_adapter(self, model_id, **kwargs): from maseval import AgentAdapter, Benchmark, Evaluator, ModelAdapter, Task, User from maseval.core.user import AgenticLLMUser from maseval.core.callback import BenchmarkCallback +from maseval.core.seeding import DefaultSeedGenerator, SeedGenerator from maseval.benchmark.tau2.environment import Tau2Environment from maseval.benchmark.tau2.evaluator import Tau2Evaluator @@ -252,7 +253,7 @@ def __init__( fail_on_evaluation_error: bool = False, progress_bar: bool | str = True, seed: Optional[int] = None, - seed_generator=None, + seed_generator: Optional[SeedGenerator] = None, ): """Initialize benchmark with tau2-specific defaults. @@ -328,7 +329,7 @@ def setup_user( # type: ignore[override] agent_data: Dict[str, Any], environment: Tau2Environment, task: Task, - seed_generator, + seed_generator: DefaultSeedGenerator, ) -> Optional[User]: """Create Tau2 user simulator. @@ -964,7 +965,7 @@ def setup_agents( # type: ignore[invalid-method-override] environment: Tau2Environment, task: Task, user: Optional[User], - seed_generator, + seed_generator: DefaultSeedGenerator, ) -> Tuple[Sequence[AgentAdapter], Dict[str, AgentAdapter]]: """Create the default tau2 agent. diff --git a/maseval/core/benchmark.py b/maseval/core/benchmark.py index a69feb33..056f19e3 100644 --- a/maseval/core/benchmark.py +++ b/maseval/core/benchmark.py @@ -740,7 +740,7 @@ def setup_evaluators(self, environment, task, agents, user, seed_generator): pass @abstractmethod - def get_model_adapter(self, model_id: str, **kwargs) -> ModelAdapter: + def get_model_adapter(self, model_id: str, **kwargs: Any) -> ModelAdapter: """Provide a ModelAdapter for benchmark components that require LLM access. Many benchmark components beyond the agents themselves require access to language @@ -772,7 +772,7 @@ def get_model_adapter(self, model_id: str, **kwargs) -> ModelAdapter: For proper tracing, register the adapter after creation using the kwargs: ```python - def get_model_adapter(self, model_id: str, **kwargs) -> ModelAdapter: + def get_model_adapter(self, model_id: str, **kwargs: Any) -> ModelAdapter: adapter = GoogleGenAIModelAdapter(self.client, model_id=model_id) # Register for tracing if registration info provided diff --git a/maseval/core/config.py b/maseval/core/config.py index 2b2f104e..d7666598 100644 --- a/maseval/core/config.py +++ b/maseval/core/config.py @@ -57,7 +57,7 @@ def gather_config(self) -> Dict[str, Any]: task execution completes. The `gather_config()` method is called sequentially and should return static configuration data (not runtime state). - Attributes: + Note: Components should expose their configuration through instance variables or properties that can be accessed during configuration gathering. """ diff --git a/maseval/core/tracing.py b/maseval/core/tracing.py index 50408ea1..6ac77ac3 100644 --- a/maseval/core/tracing.py +++ b/maseval/core/tracing.py @@ -67,11 +67,11 @@ def gather_traces(self) -> Dict[str, Any]: traces during concurrent execution, but the `gather_traces()` method itself is called sequentially. - Attributes: - Components can store traces in any internal data structure. Common patterns: - - `self.logs = []` for invocation histories - - `self._messages = MessageHistory()` for conversations - - `self.logs = []` for simulator attempts + Note: + Components can store traces in any internal data structure. Common patterns + include `self.logs = []` for invocation histories, + `self._messages = MessageHistory()` for conversations, + and `self.logs = []` for simulator attempts. """ def gather_traces(self) -> Dict[str, Any]: diff --git a/maseval/core/user.py b/maseval/core/user.py index 850e0463..c940f140 100644 --- a/maseval/core/user.py +++ b/maseval/core/user.py @@ -2,7 +2,7 @@ from .simulator import UserLLMSimulator, AgenticUserLLMSimulator from .tracing import TraceableMixin from .config import ConfigurableMixin -from typing import Dict, Any, Optional, List, Callable +from typing import Any, Dict, Optional, List, Callable from abc import ABC, abstractmethod from datetime import datetime from enum import Enum @@ -455,7 +455,7 @@ def __init__( scenario: str, tools: Optional[Dict[str, Callable]] = None, max_internal_steps: int = 5, - **kwargs, + **kwargs: Any, ): """Initialize AgenticLLMUser. diff --git a/maseval/interface/agents/camel.py b/maseval/interface/agents/camel.py index 20cbf62e..6166d108 100644 --- a/maseval/interface/agents/camel.py +++ b/maseval/interface/agents/camel.py @@ -171,7 +171,7 @@ class CamelAgentAdapter(AgentAdapter): camel-ai to be installed: `pip install maseval[camel]` """ - def __init__(self, agent_instance, name: str, callbacks=None): + def __init__(self, agent_instance: Any, name: str, callbacks: Optional[List[Any]] = None): """Initialize the CAMEL adapter. Note: We don't call super().__init__() to avoid initializing self.logs as a list, @@ -619,7 +619,7 @@ class CamelLLMUser(LLMUser): ``` """ - def get_tool(self): + def get_tool(self) -> Any: """Get a CAMEL-compatible tool for user interaction. Returns a CAMEL FunctionTool that wraps the respond method, @@ -687,7 +687,7 @@ class CamelAgentUser(User): def __init__( self, - user_agent, + user_agent: Any, initial_query: str, name: str = "camel_agent_user", max_turns: int = 10, @@ -775,7 +775,7 @@ def is_done(self) -> bool: """ return self._turn_count >= self._max_turns - def get_tool(self): + def get_tool(self) -> Any: """Return a CAMEL FunctionTool for agent-to-user interaction. Returns: @@ -833,8 +833,8 @@ def gather_config(self) -> Dict[str, Any]: def camel_role_playing_execution_loop( - role_playing, - task, + role_playing: Any, + task: Any, max_steps: int = 10, tracer: Optional["CamelRolePlayingTracer"] = None, ) -> Any: @@ -959,7 +959,7 @@ def execution_loop(self, agents, task, environment, user): ``` """ - def __init__(self, role_playing, name: str = "role_playing"): + def __init__(self, role_playing: Any, name: str = "role_playing"): """Initialize the RolePlaying tracer. Args: @@ -973,7 +973,7 @@ def __init__(self, role_playing, name: str = "role_playing"): self._termination_reason: Optional[str] = None self._step_logs: List[Dict[str, Any]] = [] - def record_step(self, assistant_response, user_response) -> None: + def record_step(self, assistant_response: Any, user_response: Any) -> None: """Record data from a RolePlaying step. Call this after each role_playing.step() to track progress. @@ -1093,7 +1093,7 @@ def setup_agents(self, agent_data, environment, task, user): ``` """ - def __init__(self, workforce, name: str = "workforce"): + def __init__(self, workforce: Any, name: str = "workforce"): """Initialize the Workforce tracer. Args: diff --git a/maseval/interface/agents/langgraph.py b/maseval/interface/agents/langgraph.py index 73cc0201..5831c81d 100644 --- a/maseval/interface/agents/langgraph.py +++ b/maseval/interface/agents/langgraph.py @@ -6,7 +6,7 @@ import time from datetime import datetime -from typing import TYPE_CHECKING, Any, Dict +from typing import TYPE_CHECKING, Any, Dict, List, Optional from maseval import AgentAdapter, MessageHistory, LLMUser @@ -116,7 +116,7 @@ def chatbot(state: MessagesState): langgraph to be installed: `pip install maseval[langgraph]` """ - def __init__(self, agent_instance, name: str, callbacks=None, config=None): + def __init__(self, agent_instance: Any, name: str, callbacks: Optional[List[Any]] = None, config: Optional[Dict[str, Any]] = None): """Initialize the LangGraph adapter. Args: diff --git a/maseval/interface/agents/llamaindex.py b/maseval/interface/agents/llamaindex.py index 6c07fbc2..d8add62f 100644 --- a/maseval/interface/agents/llamaindex.py +++ b/maseval/interface/agents/llamaindex.py @@ -7,7 +7,7 @@ import asyncio import time from datetime import datetime -from typing import TYPE_CHECKING, Any, Dict, List +from typing import TYPE_CHECKING, Any, Dict, List, Optional from maseval import AgentAdapter, MessageHistory, LLMUser @@ -111,7 +111,7 @@ def search(query: str) -> str: llama-index-core to be installed: `pip install maseval[llamaindex]` """ - def __init__(self, agent_instance, name: str, callbacks=None): + def __init__(self, agent_instance: Any, name: str, callbacks: Optional[List[Any]] = None): """Initialize the LlamaIndex adapter. Args: @@ -447,7 +447,7 @@ class LlamaIndexLLMUser(LLMUser): ``` """ - def get_tool(self): + def get_tool(self) -> Any: """Get a LlamaIndex-compatible tool for user interaction. Returns: diff --git a/maseval/interface/agents/smolagents.py b/maseval/interface/agents/smolagents.py index b68d6dac..efcd6b7f 100644 --- a/maseval/interface/agents/smolagents.py +++ b/maseval/interface/agents/smolagents.py @@ -488,7 +488,7 @@ class SmolAgentLLMUser(LLMUser): ``` """ - def get_tool(self): + def get_tool(self) -> Any: """Get a smolagents-compatible tool for user interaction. Returns a `SmolAgentUserSimulationInputTool` instance that wraps this user From 25fd6bd2965499d66bb1f37b192e3f4ce555e4ce Mon Sep 17 00:00:00 2001 From: cemde Date: Mon, 16 Feb 2026 12:33:09 +0100 Subject: [PATCH 2/2] [skip ci] small doc fixes --- docs/reference/environment.md | 2 ++ docs/reference/user.md | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/reference/environment.md b/docs/reference/environment.md index 18d4e1bd..77d40e30 100644 --- a/docs/reference/environment.md +++ b/docs/reference/environment.md @@ -13,3 +13,5 @@ Some agent adapters expose helper tools or user-simulation tools that can be use [:material-github: View source](https://github.com/parameterlab/maseval/blob/main/maseval/interface/agents/smolagents.py){ .md-source-file } ::: maseval.interface.agents.smolagents.SmolAgentAdapter + +::: maseval.interface.agents.smolagents.SmolAgentLLMUser diff --git a/docs/reference/user.md b/docs/reference/user.md index fa525869..c739ad25 100644 --- a/docs/reference/user.md +++ b/docs/reference/user.md @@ -16,7 +16,9 @@ The `LLMUser` is initialized with a persona and a scenario, both of which are ty Some integrations provide convenience user/tool implementations for specific agent frameworks. For example: -See [SmolAgentLLMUser](../interface/agents/smolagents.md) in the smolagents integration documentation. +[:material-github: View source](https://github.com/parameterlab/maseval/blob/main/maseval/interface/agents/smolagents.py){ .md-source-file } + +::: maseval.interface.agents.smolagents.SmolAgentLLMUser [:material-github: View source](https://github.com/parameterlab/maseval/blob/main/maseval/interface/agents/langgraph.py){ .md-source-file }