parameterlab · cemde · Feb 16, 2026 · Feb 15, 2026 · Feb 16, 2026
diff --git a/docs/getting-started/faq.md b/docs/getting-started/faq.md
@@ -12,7 +12,7 @@ Anyone! We had a few groups in mind when building MASEval.
 
 1. Check this documentation.
 2. If the feature does not exist, please [open an issue on GitHub](https://github.com/parameterlab/MASEval/issues/new). Feature requests are welcome.
-3. Consider implementing it yourself. Check out the [contributing guide](contributing.md) for details.
+3. Consider implementing it yourself. Check out the [contributing guide](https://github.com/parameterlab/MASEval/blob/main/CONTRIBUTING.md) for details.
 
 ## Q: Can I only test multi-agent systems?
 

diff --git a/maseval/benchmark/gaia2/gaia2.py b/maseval/benchmark/gaia2/gaia2.py
@@ -124,7 +124,7 @@ def __init__(
         fail_on_evaluation_error: bool = False,
         progress_bar: bool | str = True,
         seed: Optional[int] = None,
-        seed_generator=None,
+        seed_generator: Optional[SeedGenerator] = None,
     ):
         """Initialize benchmark with Gaia2-specific defaults.
 

diff --git a/maseval/benchmark/macs/macs.py b/maseval/benchmark/macs/macs.py
@@ -65,6 +65,7 @@ def get_model_adapter(self, model_id, **kwargs):
 )
 from maseval.core.config import ConfigurableMixin
 from maseval.core.tracing import TraceableMixin
+from maseval.core.seeding import DefaultSeedGenerator
 
 
 # Statuses where agent is accountable (included in scoring)
@@ -147,7 +148,7 @@ def _schema_to_inputs(schema: Dict[str, Any]) -> Dict[str, Any]:
             }
         return inputs
 
-    def __call__(self, **kwargs) -> str:
+    def __call__(self, **kwargs: Any) -> str:
         """Execute the tool with simulated response.
 
         Args:
@@ -828,7 +829,7 @@ def setup_user(  # type: ignore[invalid-method-override]
         agent_data: Dict[str, Any],
         environment: MACSEnvironment,
         task: Task,
-        seed_generator,
+        seed_generator: DefaultSeedGenerator,
     ) -> MACSUser:
         """Create MACS user simulator.
 
@@ -872,7 +873,7 @@ def setup_agents(  # type: ignore[invalid-method-override]
         environment: MACSEnvironment,
         task: Task,
         user: Optional[User],
-        seed_generator,
+        seed_generator: DefaultSeedGenerator,
     ) -> Tuple[Sequence[AgentAdapter], Dict[str, AgentAdapter]]:
         """Create agents for this task. Must be implemented by subclass.
 

diff --git a/maseval/benchmark/multiagentbench/multiagentbench.py b/maseval/benchmark/multiagentbench/multiagentbench.py
@@ -95,7 +95,7 @@ def __init__(
         fail_on_evaluation_error: bool = False,
         progress_bar: bool | str = True,
         seed: Optional[int] = None,
-        seed_generator=None,
+        seed_generator: Optional[SeedGenerator] = None,
     ):
         """Initialize the benchmark.
 

diff --git a/maseval/benchmark/tau2/tau2.py b/maseval/benchmark/tau2/tau2.py
@@ -68,6 +68,7 @@ def get_model_adapter(self, model_id, **kwargs):
 from maseval import AgentAdapter, Benchmark, Evaluator, ModelAdapter, Task, User
 from maseval.core.user import AgenticLLMUser
 from maseval.core.callback import BenchmarkCallback
+from maseval.core.seeding import DefaultSeedGenerator, SeedGenerator
 
 from maseval.benchmark.tau2.environment import Tau2Environment
 from maseval.benchmark.tau2.evaluator import Tau2Evaluator
@@ -252,7 +253,7 @@ def __init__(
         fail_on_evaluation_error: bool = False,
         progress_bar: bool | str = True,
         seed: Optional[int] = None,
-        seed_generator=None,
+        seed_generator: Optional[SeedGenerator] = None,
     ):
         """Initialize benchmark with tau2-specific defaults.
 
@@ -328,7 +329,7 @@ def setup_user(  # type: ignore[override]
         agent_data: Dict[str, Any],
         environment: Tau2Environment,
         task: Task,
-        seed_generator,
+        seed_generator: DefaultSeedGenerator,
     ) -> Optional[User]:
         """Create Tau2 user simulator.
 
@@ -964,7 +965,7 @@ def setup_agents(  # type: ignore[invalid-method-override]
         environment: Tau2Environment,
         task: Task,
         user: Optional[User],
-        seed_generator,
+        seed_generator: DefaultSeedGenerator,
     ) -> Tuple[Sequence[AgentAdapter], Dict[str, AgentAdapter]]:
         """Create the default tau2 agent.
 

diff --git a/maseval/core/benchmark.py b/maseval/core/benchmark.py
@@ -740,7 +740,7 @@ def setup_evaluators(self, environment, task, agents, user, seed_generator):
         pass
 
     @abstractmethod
-    def get_model_adapter(self, model_id: str, **kwargs) -> ModelAdapter:
+    def get_model_adapter(self, model_id: str, **kwargs: Any) -> ModelAdapter:
         """Provide a ModelAdapter for benchmark components that require LLM access.
 
         Many benchmark components beyond the agents themselves require access to language
@@ -772,7 +772,7 @@ def get_model_adapter(self, model_id: str, **kwargs) -> ModelAdapter:
             For proper tracing, register the adapter after creation using the kwargs:
 
             ```python
-            def get_model_adapter(self, model_id: str, **kwargs) -> ModelAdapter:
+            def get_model_adapter(self, model_id: str, **kwargs: Any) -> ModelAdapter:
                 adapter = GoogleGenAIModelAdapter(self.client, model_id=model_id)
 
                 # Register for tracing if registration info provided

diff --git a/maseval/core/config.py b/maseval/core/config.py
@@ -57,7 +57,7 @@ def gather_config(self) -> Dict[str, Any]:
         task execution completes. The `gather_config()` method is called sequentially
         and should return static configuration data (not runtime state).
 
-    Attributes:
+    Note:
         Components should expose their configuration through instance variables or
         properties that can be accessed during configuration gathering.
     """

diff --git a/maseval/core/tracing.py b/maseval/core/tracing.py
@@ -67,11 +67,11 @@ def gather_traces(self) -> Dict[str, Any]:
         traces during concurrent execution, but the `gather_traces()` method
         itself is called sequentially.
 
-    Attributes:
-        Components can store traces in any internal data structure. Common patterns:
-        - `self.logs = []` for invocation histories
-        - `self._messages = MessageHistory()` for conversations
-        - `self.logs = []` for simulator attempts
+    Note:
+        Components can store traces in any internal data structure. Common patterns
+        include `self.logs = []` for invocation histories,
+        `self._messages = MessageHistory()` for conversations,
+        and `self.logs = []` for simulator attempts.
     """
 
     def gather_traces(self) -> Dict[str, Any]:

diff --git a/maseval/core/user.py b/maseval/core/user.py
@@ -2,7 +2,7 @@
 from .simulator import UserLLMSimulator, AgenticUserLLMSimulator
 from .tracing import TraceableMixin
 from .config import ConfigurableMixin
-from typing import Dict, Any, Optional, List, Callable
+from typing import Any, Dict, Optional, List, Callable
 from abc import ABC, abstractmethod
 from datetime import datetime
 from enum import Enum
@@ -455,7 +455,7 @@ def __init__(
         scenario: str,
         tools: Optional[Dict[str, Callable]] = None,
         max_internal_steps: int = 5,
-        **kwargs,
+        **kwargs: Any,
     ):
         """Initialize AgenticLLMUser.
 

diff --git a/maseval/interface/agents/camel.py b/maseval/interface/agents/camel.py
@@ -171,7 +171,7 @@ class CamelAgentAdapter(AgentAdapter):
         camel-ai to be installed: `pip install maseval[camel]`
     """
 
-    def __init__(self, agent_instance, name: str, callbacks=None):
+    def __init__(self, agent_instance: Any, name: str, callbacks: Optional[List[Any]] = None):
         """Initialize the CAMEL adapter.
 
         Note: We don't call super().__init__() to avoid initializing self.logs as a list,
@@ -619,7 +619,7 @@ class CamelLLMUser(LLMUser):
         ```
     """
 
-    def get_tool(self):
+    def get_tool(self) -> Any:
         """Get a CAMEL-compatible tool for user interaction.
 
         Returns a CAMEL FunctionTool that wraps the respond method,
@@ -687,7 +687,7 @@ class CamelAgentUser(User):
 
     def __init__(
         self,
-        user_agent,
+        user_agent: Any,
         initial_query: str,
         name: str = "camel_agent_user",
         max_turns: int = 10,
@@ -775,7 +775,7 @@ def is_done(self) -> bool:
         """
         return self._turn_count >= self._max_turns
 
-    def get_tool(self):
+    def get_tool(self) -> Any:
         """Return a CAMEL FunctionTool for agent-to-user interaction.
 
         Returns:
@@ -833,8 +833,8 @@ def gather_config(self) -> Dict[str, Any]:
 
 
 def camel_role_playing_execution_loop(
-    role_playing,
-    task,
+    role_playing: Any,
+    task: Any,
     max_steps: int = 10,
     tracer: Optional["CamelRolePlayingTracer"] = None,
 ) -> Any:
@@ -959,7 +959,7 @@ def execution_loop(self, agents, task, environment, user):
         ```
     """
 
-    def __init__(self, role_playing, name: str = "role_playing"):
+    def __init__(self, role_playing: Any, name: str = "role_playing"):
         """Initialize the RolePlaying tracer.
 
         Args:
@@ -973,7 +973,7 @@ def __init__(self, role_playing, name: str = "role_playing"):
         self._termination_reason: Optional[str] = None
         self._step_logs: List[Dict[str, Any]] = []
 
-    def record_step(self, assistant_response, user_response) -> None:
+    def record_step(self, assistant_response: Any, user_response: Any) -> None:
         """Record data from a RolePlaying step.
 
         Call this after each role_playing.step() to track progress.
@@ -1093,7 +1093,7 @@ def setup_agents(self, agent_data, environment, task, user):
         ```
     """
 
-    def __init__(self, workforce, name: str = "workforce"):
+    def __init__(self, workforce: Any, name: str = "workforce"):
         """Initialize the Workforce tracer.
 
         Args:

diff --git a/maseval/interface/agents/langgraph.py b/maseval/interface/agents/langgraph.py
@@ -6,7 +6,7 @@
 
 import time
 from datetime import datetime
-from typing import TYPE_CHECKING, Any, Dict
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
 from maseval import AgentAdapter, MessageHistory, LLMUser
 
@@ -116,7 +116,7 @@ def chatbot(state: MessagesState):
         langgraph to be installed: `pip install maseval[langgraph]`
     """
 
-    def __init__(self, agent_instance, name: str, callbacks=None, config=None):
+    def __init__(self, agent_instance: Any, name: str, callbacks: Optional[List[Any]] = None, config: Optional[Dict[str, Any]] = None):
         """Initialize the LangGraph adapter.
 
         Args:

diff --git a/maseval/interface/agents/llamaindex.py b/maseval/interface/agents/llamaindex.py
@@ -7,7 +7,7 @@
 import asyncio
 import time
 from datetime import datetime
-from typing import TYPE_CHECKING, Any, Dict, List
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
 from maseval import AgentAdapter, MessageHistory, LLMUser
 
@@ -111,7 +111,7 @@ def search(query: str) -> str:
         llama-index-core to be installed: `pip install maseval[llamaindex]`
     """
 
-    def __init__(self, agent_instance, name: str, callbacks=None):
+    def __init__(self, agent_instance: Any, name: str, callbacks: Optional[List[Any]] = None):
         """Initialize the LlamaIndex adapter.
 
         Args:
@@ -447,7 +447,7 @@ class LlamaIndexLLMUser(LLMUser):
         ```
     """
 
-    def get_tool(self):
+    def get_tool(self) -> Any:
         """Get a LlamaIndex-compatible tool for user interaction.
 
         Returns:

diff --git a/maseval/interface/agents/smolagents.py b/maseval/interface/agents/smolagents.py
@@ -488,7 +488,7 @@ class SmolAgentLLMUser(LLMUser):
         ```
     """
 
-    def get_tool(self):
+    def get_tool(self) -> Any:
         """Get a smolagents-compatible tool for user interaction.
 
         Returns a `SmolAgentUserSimulationInputTool` instance that wraps this user