From 53c6312adbfd37723eef83856485a32f01e830c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20D=C3=BCck?= <florian.dueck@student.uni-siegen.de>
Date: Sun, 28 Jun 2026 10:20:06 +0200
Subject: [PATCH] feat: add support for running the same prompt multiple times
 with separate output directories

---
 README.md                         |  10 +++
 main.py                           | 107 ++++++++++++++++++++++--------
 treesearch/utils/costs_tracker.py |   5 ++
 utils/log.py                      |   7 ++
 utils/statistics_tracker.py       |   6 ++
 5 files changed, 108 insertions(+), 27 deletions(-)

diff --git a/README.md b/README.md
index 1222b80..1c6e840 100644
--- a/README.md
+++ b/README.md
@@ -177,6 +177,16 @@ uv run main.py --list-models
 uv run main.py --model "gpt-4o"
 ```
 
+**Run the same prompt multiple times:**
+```bash
+uv run main.py --prompt "Build a recommender..." --runs 10
+```
+Each run is stored in its own numbered subfolder inside the out directory
+(`out/run_01`, `out/run_02`, ... `out/run_10`). Re-running continues the
+numbering from the highest existing `run_*` folder, so previous results are
+never overwritten. With `--runs 1` (the default) the output is written directly
+to `out/` as before.
+
 ## Embeddings / documentation index
 
 AutoRecLab uses FAISS vector stores in `ragEmbeddings/` for docs-aware coding.
diff --git a/main.py b/main.py
index 3f503f2..8fa2d8d 100644
--- a/main.py
+++ b/main.py
@@ -19,12 +19,12 @@ async def main():
     set_log_level(os.getenv("ISGSA_LOG", "INFO"))
 
     config = get_config()
-    out_dir = mkdir(config.out_dir)
+    base_out_dir = mkdir(config.out_dir)
     args = get_args()
 
     #Init workspace
     if args.init:
-        mkdir(out_dir / "workspace")
+        mkdir(base_out_dir / "workspace")
         return
 
 
@@ -46,38 +46,85 @@ async def main():
         config.agent.code = config.agent.code.model_copy(update={"model": args.model})
 
 
-    # Prepare to run AutoRecLab
-    attach_file_handler(out_dir)
-    cost_tracker.set_out_dir(out_dir)
-    statistics_tracker.set_out_dir(out_dir)
-    require_executable("dot")
+    # Get user request (read once, reused for every run)
+    user_request = get_user_request(args)
+
+    if user_request is None or user_request.strip() == "":
+        logger.error("No request provided. Please provide a prompt using --prompt or --prompt-file, or type it manually.")
+        return
+
+
+    # Validate the number of runs
+    num_runs = args.runs
+    if num_runs < 1:
+        logger.error("--runs must be a positive integer (got %s).", num_runs)
+        return
+
+
+    # Run AutoRecLab once or multiple times with the same prompt
+    if num_runs == 1:
+        await run_once(config, base_out_dir, user_request, args)
+    else:
+        start_index = next_run_index(base_out_dir)
+        pad_width = max(2, len(str(start_index + num_runs - 1)))
+
+        for offset in range(num_runs):
+            run_number = start_index + offset
+            run_dir = mkdir(base_out_dir / f"run_{run_number:0{pad_width}d}")
+
+            logger.info(
+                f"===== Starting run {offset + 1}/{num_runs} "
+                f"(out dir: {run_dir}) ====="
+            )
 
+            # Each run gets its own out_dir and a fresh tracker state
+            config.out_dir = str(run_dir)
+            await run_once(config, run_dir, user_request, args)
 
-    # Get user request
-    user_request = None
+        logger.info(f"Finished all {num_runs} runs in {base_out_dir}")
 
+
+def get_user_request(args) -> str | None:
     if args.prompt is not None:
-        user_request = args.prompt
+        return args.prompt
 
-    elif args.prompt_file is not None:
+    if args.prompt_file is not None:
         with open(args.prompt_file, "r", encoding="utf-8") as f:
-            user_request = f.read().strip()
+            return f.read().strip()
 
-    else:
-        user_req_lines: list[str] = []
-        print('Enter you request, write "!start" to start:')
-        while True:
-            line = input("> ")
-            if line.lower().strip().startswith("!start"):
-                break
-            user_req_lines.append(line)
+    user_req_lines: list[str] = []
+    print('Enter you request, write "!start" to start:')
+    while True:
+        line = input("> ")
+        if line.lower().strip().startswith("!start"):
+            break
+        user_req_lines.append(line)
 
-        user_request = "\n".join(user_req_lines)
+    return "\n".join(user_req_lines)
 
-    if user_request is None or user_request.strip() == "":
-        logger.error("No request provided. Please provide a prompt using --prompt or --prompt-file, or type it manually.")
-        return
-    
+
+def next_run_index(base_out_dir) -> int:
+    """Return the next available run number based on existing run_* folders."""
+    max_index = 0
+    for entry in base_out_dir.glob("run_*"):
+        if not entry.is_dir():
+            continue
+        suffix = entry.name[len("run_"):]
+        if suffix.isdigit():
+            max_index = max(max_index, int(suffix))
+    return max_index + 1
+
+
+async def run_once(config, out_dir, user_request: str, args):
+    # Start each run from a clean tracker state
+    cost_tracker.reset()
+    statistics_tracker.reset()
+
+    # Prepare to run AutoRecLab
+    attach_file_handler(out_dir)
+    cost_tracker.set_out_dir(out_dir)
+    statistics_tracker.set_out_dir(out_dir)
+    require_executable("dot")
 
     # Log the user request
     if not args.prompt_no_log:
@@ -85,7 +132,6 @@ async def main():
         with open(prompt_file, "w", encoding="utf-8") as f:
             f.write(user_request)
 
-
     # Start AutoRecLab
     logger.info("Starting AutoRecLab...")
     logger.debug(f"User request:\n{user_request}")
@@ -93,7 +139,6 @@ async def main():
     await ts._async_init()
     await ts.run()
 
-
     # Summarize results
     cost_tracker.saveSummarized()
     statistics_tracker.summarize_statistics()
@@ -108,6 +153,14 @@ def get_args():
     parser.add_argument("--list-datasets", action="store_true")
     parser.add_argument("--list-models", action="store_true")
     parser.add_argument("--model", type=str, default=None)
+    parser.add_argument(
+        "--runs",
+        type=int,
+        default=1,
+        help="How often to run the program with the same prompt. "
+        "Each run is stored in its own numbered subfolder (run_001, run_002, ...) "
+        "inside the out directory.",
+    )
 
     return parser.parse_args()
 
diff --git a/treesearch/utils/costs_tracker.py b/treesearch/utils/costs_tracker.py
index 76d031b..edf7f20 100644
--- a/treesearch/utils/costs_tracker.py
+++ b/treesearch/utils/costs_tracker.py
@@ -93,6 +93,11 @@ def __init__(self):
         self.costsList = []
         self.out_dir = None
 
+    def reset(self):
+        """Reset the tracker state so it can be reused for a fresh run."""
+        self.costsList = []
+        self.out_dir = None
+
     def saveSummarized(self):
         if self.out_dir is not None:
             with open(self.out_dir / "costs_log.csv", "a") as f:
diff --git a/utils/log.py b/utils/log.py
index 09f3aa1..bcb29b4 100644
--- a/utils/log.py
+++ b/utils/log.py
@@ -34,6 +34,13 @@ def set_log_level(level: str):
 
 
 def attach_file_handler(file_log_dir: Path, level=logging.DEBUG):
+    # Remove any previously attached file handlers so each run logs to its own
+    # debug.log instead of accumulating handlers across multiple runs.
+    for handler in list(_ROOT_LOGGER.handlers):
+        if isinstance(handler, logging.FileHandler):
+            _ROOT_LOGGER.removeHandler(handler)
+            handler.close()
+
     file_log_dir.mkdir(exist_ok=True, parents=True)
     file_handler = logging.FileHandler(file_log_dir / "debug.log", encoding="utf-8")
     file_handler.setLevel(logging.DEBUG)
diff --git a/utils/statistics_tracker.py b/utils/statistics_tracker.py
index d8b6526..5fbce5d 100644
--- a/utils/statistics_tracker.py
+++ b/utils/statistics_tracker.py
@@ -146,6 +146,12 @@ def __init__(self):
         self.checkpoint_dir = None
         self.nodes_ordered = []
 
+    def reset(self):
+        """Reset the tracker state so it can be reused for a fresh run."""
+        self.out_dir = None
+        self.checkpoint_dir = None
+        self.nodes_ordered = []
+
     def set_out_dir(self, out_dir):
         self.checkpoint_dir = os.path.join(out_dir, "checkpoint")