diff --git a/treesearch/minimal_agent.py b/treesearch/minimal_agent.py index 7a1f9d0..d83c618 100644 --- a/treesearch/minimal_agent.py +++ b/treesearch/minimal_agent.py @@ -2,6 +2,7 @@ import random from pathlib import Path from typing import Any, Optional +import asyncio import humanize @@ -513,8 +514,10 @@ async def score_code(self, node: Node, exec_result: ExecutionResult) -> Node: # Proceed with detailed scoring regardless of bug status logger.info("Proceeding with detailed scoring") - # Use the scoring system - for req in node.requirements: + # Score each requirement with an independent LLM call. + # Requirements are independent of each other, so we evaluate them + # concurrently with asyncio.gather to reduce wall-clock time. + async def _score_requirement(req: Requirement) -> None: logger.debug("Scoring requirement: %s", req.description) scoring_prompt: Prompt = { "Instructions": ( @@ -555,6 +558,8 @@ async def score_code(self, node: Node, exec_result: ExecutionResult) -> Node: req.is_fulfilled = False req.feedback = "No specific feedback provided." + await asyncio.gather(*(_score_requirement(req) for req in node.requirements)) + all_fulfilled = all(r.is_fulfilled for r in node.requirements) logger.debug("All requirements fulfilled=%s", all_fulfilled) if not node.is_buggy and all_fulfilled: