Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,15 @@ def generate_compose(
if mongo_dbs:
services["dab-mongo"] = {
"image": MONGO_IMAGE,
# mongo:8 intermittently SIGSEGVs (exit 139) on startup, and its
# WiredTiger cache auto-sizes to ~half of host RAM (≈7GB on a 15GB
# box), starving the agent. Cap the cache and let docker restart a
# crashed mongod: the data dir is already populated so the restart
# comes straight back up with data and the healthcheck recovers.
# Without this, a single crash bricks the trial — main's healthcheck
# fails for its whole retry window and the trial is cancelled.
"command": ["--wiredTigerCacheSizeGB", "1"],
"restart": "on-failure",
"healthcheck": {
"test": ["CMD", "mongosh", "--quiet", "--eval", "db.runCommand({ping:1})"],
"interval": "5s",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,14 @@ def _materialize_task_dir(
dataset=dataset_meta.name,
query_id=query_id,
)
# Some upstream validators do `from common_scaffold.validate.levenshtein
# import levenshtein`. verify.py exec_module's validate.py inside the
# dab-agent container, which has no common_scaffold installed, so the
# import raises, verify.py exits non-zero under `set -eu`, no reward.json
# is written, and harbor reports RewardFileNotFoundError (verifier appears
# to never run). Vendor common_scaffold next to verify.py — /tests is
# sys.path[0] — so the import resolves. The batch path already does this.
_install_common_scaffold(tests_dir=tests_dir, data_root=dataset_dir.parent)

write_stratum_file(
tests_dir=tests_dir,
Expand Down
11 changes: 11 additions & 0 deletions packages/razorback-plugin-dab/tests/unit/test_compose_mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,17 @@ def test_mongo_service_emitted(tmp_path: Path):
assert "mongosh" in services["dab-mongo"]["healthcheck"]["test"]


def test_mongo_has_restart_and_cache_cap(tmp_path: Path):
# mongo:8 intermittently SIGSEGVs on startup; on a constrained host its
# default WiredTiger cache (~half of RAM) also starves the agent. The
# service must cap the cache and restart on failure so a single crash
# does not brick the trial via main's healthcheck retry window.
text = generate_compose(db_config=_AGNEWS_LIKE, dataset_name="agnews", data_root=tmp_path)
mongo = yaml.safe_load(text)["services"]["dab-mongo"]
assert mongo["restart"] == "on-failure"
assert mongo["command"] == ["--wiredTigerCacheSizeGB", "1"]


def test_main_depends_on_mongo(tmp_path: Path):
text = generate_compose(db_config=_AGNEWS_LIKE, dataset_name="agnews", data_root=tmp_path)
compose = yaml.safe_load(text)
Expand Down
60 changes: 60 additions & 0 deletions packages/razorback-plugin-dab/tests/unit/test_prepare_per_query.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# ABOUTME: AC-3 — task-dir shape for one (dataset, query) emission.
# ABOUTME: Asserts forbidden files (ground_truth.csv, validate.py) never reach workdir.

import importlib.util
from pathlib import Path

import pytest
Expand Down Expand Up @@ -74,6 +75,65 @@ def test_task_dir_layout(tmp_path: Path):
assert (task_dir / "steps" / "main" / "workdir" / "README.md").exists()


def _build_common_scaffold_data_root(root: Path) -> Path:
"""Single-query data root whose validator imports common_scaffold."""
data_root = root / "data"
scaffold_validate = data_root / "common_scaffold" / "validate"
scaffold_validate.mkdir(parents=True)
(scaffold_validate / "levenshtein.py").write_text(
"def levenshtein(left, right):\n"
" return 0 if left == right else 1\n"
)
(scaffold_validate / "__pycache__").mkdir()
(scaffold_validate / "__pycache__" / "ignored.pyc").write_bytes(b"cached")

qdir = data_root / "query_PATENTS"
qdir.mkdir(parents=True)
(qdir / "db_description.txt").write_text("Synthetic affected DAB dataset.")
q1 = qdir / "query1"
q1.mkdir()
(q1 / "query.json").write_text('{"question": "Return abc."}')
(q1 / "validate.py").write_text(
"from common_scaffold.validate.levenshtein import levenshtein\n\n"
"def validate(answer):\n"
" distance = levenshtein(answer, 'abc')\n"
" return (distance == 0, f'distance={distance}')\n"
)
return data_root


def test_per_query_materializes_common_scaffold_for_upstream_validators(
tmp_path: Path, monkeypatch
) -> None:
# Regression: 9 of 54 DAB validators do `from common_scaffold.validate
# .levenshtein import levenshtein`. verify.py exec_module's validate.py in
# the dab-agent container (no common_scaffold installed), so without the
# vendored package the import raises, no reward.json is written, and harbor
# reports RewardFileNotFoundError (the verifier appears never to run). The
# batch path already vendored it; the per-query path must too.
data_root = _build_common_scaffold_data_root(tmp_path)
manifest = prepare_dataset_tasks(
data_root=data_root,
dataset="PATENTS",
tasks_root=tmp_path / "tasks",
)
tests_dir = manifest[0]["task_dir"] / "tests"

assert (tests_dir / "common_scaffold" / "validate" / "levenshtein.py").exists()
assert not (tests_dir / "common_scaffold" / "validate" / "__pycache__").exists()

# The copied validator must import and run with /tests on sys.path
# (verify.py runs as `python /tests/verify.py`, so /tests is sys.path[0]).
monkeypatch.syspath_prepend(str(tests_dir))
spec = importlib.util.spec_from_file_location(
"_generated_validate_per_query", tests_dir / "validate.py"
)
assert spec is not None and spec.loader is not None
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
assert module.validate("abc") == (True, "distance=0")


def test_compose_bind_mount_sources_resolve_to_real_files(tmp_path: Path):
"""PKG-13 T1 / AC-4 + PKG-14 AC-1: bind-mount sources in the generated
compose must point at existing files. Under PKG-14 bind mode (the default)
Expand Down